Adding AR support for C3: use DSP FFT and integer math (#4750)

* fixed problem on S3: turns out to be mem alignment * fixed scaling for C3 * code cleanup, added arduinoFFT back in, added ifdefs & description also added major peak and frequency bin calculation for DSP FFT * moved ifdefs to correct place, separated sample filter & FFT filter application post FFT band pass and IIR applied to samples are now separated: I found in testing that applying the sample filter helps with aliasing into base-bands, there is no need to hard-cut the lowest frequencies after FFT. * changed sample low pass cutoff from 80Hz to 90Hz better anti-aliasing at minimal loss of base frequency. * code cleanup and minor speed improvement - moved scaling of FFT values into fftAddAvg() to use ferwer operations - added "using" for math types, removing some ifdefs and duplications
2026-04-25 16:42:44 +00:00 · 2026-03-29 13:18:26 +02:00
parent 4a6ff64519
commit 42844e4fa8
3 changed files with 291 additions and 65 deletions
--- a/platformio.ini
+++ b/platformio.ini
@@ -542,6 +542,7 @@ platform_packages = ${esp32c3.platform_packages}
 framework = arduino
 board = esp32-c3-devkitm-1
 board_build.partitions = ${esp32.default_partitions}
+custom_usermods = audioreactive
 build_flags = ${common.build_flags} ${esp32c3.build_flags} -D WLED_RELEASE_NAME=\"ESP32-C3\"
  -D WLED_WATCHDOG_TIMEOUT=0
  -DLOLIN_WIFI_FIX ; seems to work much better with this
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -20,6 +20,26 @@
 * ....
 */

+#define FFT_PREFER_EXACT_PEAKS  // use Blackman-Harris FFT windowing instead of Flat Top -> results in "sharper" peaks and less "leaking" into other frequencies (credits to @softhack)
+
+/*
+ * Note on FFT variants:
+ * - ArduinoFFT: uses floating point calculations, very slow on S2 and C3 (no FPU)
+ * - ESP-IDF DSP library:
+     - faster but uses ~13k of extra flash on ESP32 and S3
+ *   - uses integer math on S2 and C3: slightly less accurate but over 10x faster than ArduinoFFT and uses less flash
+     - not available in IDF < 4.4
+ * - ArduinoFFT is used by default on ESP32 and S3
+ * - ESP-IDF DSP FFT with integer math is used by default on S2 and C3
+ * - defines:
+ *   - UM_AUDIOREACTIVE_USE_ARDUINO_FFT: use ArduinoFFT library for FFT
+ *   - UM_AUDIOREACTIVE_USE_ESPDSP_FFT:  use ESP-IDF DSP for FFT
+*/
+
+//#define UM_AUDIOREACTIVE_USE_ESPDSP_FFT  // default on S2 and C3
+//#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // use integer FFT if using ESP-IDF DSP library, always used on S2 and C3 (UM_AUDIOREACTIVE_USE_ARDUINO_FFT takes priority)
+//#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // default on ESP32 and S3
+
 #if !defined(FFTTASK_PRIORITY)
 #define FFTTASK_PRIORITY 1 // standard: looptask prio
 //#define FFTTASK_PRIORITY 2 // above looptask, below asyc_tcp
@@ -99,6 +119,46 @@ static uint8_t maxVol = 31;          // (was 10) Reasonable value for constant v
 static uint8_t binNum = 8;           // Used to select the bin for FFT based beat detection  (deprecated)

 #ifdef ARDUINO_ARCH_ESP32
+#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) && (defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32))
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default on ESP32 and S3
+#endif
+
+#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // DSP FFT library is not available in ESP-IDF < 4.4
+#endif
+
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+#include <arduinoFFT.h> // ArduinoFFT library for FFT and window functions
+#undef UM_AUDIOREACTIVE_USE_INTEGER_FFT // arduinoFFT has not integer support
+#else
+#include "dsps_fft2r.h" // ESP-IDF DSP library for FFT and window functions
+#ifdef FFT_PREFER_EXACT_PEAKS
+#include "dsps_wind_blackman_harris.h"
+#else
+#include "dsps_wind_flat_top.h"
+#endif
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // always use integer FFT on ESP32-S2 and ESP32-C3
+#endif
+#endif
+
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+using FFTsampleType = float;
+using FFTmathType = float;
+#define FFTabs fabsf
+#else
+using FFTsampleType = int16_t;
+using FFTmathType = int32_t;
+#define FFTabs abs
+#endif
+// These are the input and output vectors.  Input vectors receive computed results from FFT.
+static FFTsampleType* valFFT = nullptr;
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+static float* vImag = nullptr; // imaginary part of FFT results
+#endif
+
+// pre-computed window function
+static FFTsampleType* windowFFT = nullptr;

 // use audio source class (ESP32 specific)
 #include "audio_source.h"
@@ -108,14 +168,14 @@ constexpr int BLOCK_SIZE = 128;                  // I2S buffer size (samples)
 // globals
 static uint8_t inputLevel = 128;              // UI slider value
 #ifndef SR_SQUELCH
-  uint8_t soundSquelch = 10;                  // squelch value for volume reactive routines (config value)
+  static uint8_t soundSquelch = 10;                  // squelch value for volume reactive routines (config value)
 #else
-  uint8_t soundSquelch = SR_SQUELCH;          // squelch value for volume reactive routines (config value)
+  static uint8_t soundSquelch = SR_SQUELCH;          // squelch value for volume reactive routines (config value)
 #endif
 #ifndef SR_GAIN
-  uint8_t sampleGain = 60;                    // sample gain (config value)
+  static uint8_t sampleGain = 60;                    // sample gain (config value)
 #else
-  uint8_t sampleGain = SR_GAIN;               // sample gain (config value)
+  static uint8_t sampleGain = SR_GAIN;               // sample gain (config value)
 #endif
 // user settable options for FFTResult scaling
 static uint8_t FFTScalingMode = 3;            // 0 none; 1 optimized logarithmic; 2 optimized linear; 3 optimized square root
@@ -140,8 +200,8 @@ const float agcSampleSmooth[AGC_NUM_PRESETS]  = {  1/12.f,   1/6.f,  1/16.f}; //
 // AGC presets end

 static AudioSource *audioSource = nullptr;
-static bool useBandPassFilter = false;                    // if true, enables a bandpass filter 80Hz-16Khz to remove noise. Applies before FFT.
-
+static bool useBandPassFilter = false;                    // if true, enables a hard cutoff bandpass filter. Applies after FFT.
+static bool useMicFilter = false;                         // if true, enables a IIR bandpass filter 80Hz-20Khz to remove noise. Applies before FFT.
 ////////////////////
 // Begin FFT Code //
 ////////////////////
@@ -149,7 +209,7 @@ static bool useBandPassFilter = false;                    // if true, enables a
 // some prototypes, to ensure consistent interfaces
 static float fftAddAvg(int from, int to);   // average of several FFT result bins
 void FFTcode(void * parameter);      // audio processing task: read samples, run FFT, fill GEQ channels from FFT results
-static void runMicFilter(uint16_t numSamples, float *sampleBuffer);          // pre-filtering of raw samples (band-pass)
+static void runMicFilter(uint16_t numSamples, FFTsampleType *sampleBuffer);
 static void postProcessFFTResults(bool noiseGateOpen, int numberOfChannels); // post-processing and post-amp of GEQ channels

 static TaskHandle_t FFT_Task = nullptr;
@@ -185,13 +245,13 @@ constexpr uint16_t samplesFFT = 512;            // Samples in an FFT batch - Thi
 constexpr uint16_t samplesFFT_2 = 256;          // meaningfull part of FFT results - only the "lower half" contains useful information.
 // the following are observed values, supported by a bit of "educated guessing"
 //#define FFT_DOWNSCALE 0.65f                             // 20kHz - downscaling factor for FFT results - "Flat-Top" window @20Khz, old freq channels 
+#ifdef FFT_PREFER_EXACT_PEAKS
+#define FFT_DOWNSCALE 0.40f                             // downscaling factor for FFT results, RMS averaging for "Blackman-Harris" Window @22kHz (credit to MM)
+#else
 #define FFT_DOWNSCALE 0.46f                             // downscaling factor for FFT results - for "Flat-Top" window @22Khz, new freq channels
+#endif
 #define LOG_256  5.54517744f                            // log(256)

-// These are the input and output vectors.  Input vectors receive computed results from FFT.
-static float* vReal = nullptr;                  // FFT sample inputs / freq output -  these are our raw result bins
-static float* vImag = nullptr;                  // imaginary parts
-
 // Create FFT object
 // lib_deps += https://github.com/kosme/arduinoFFT#develop @ 1.9.2
 // these options actually cause slow-downs on all esp32 processors, don't use them.
@@ -200,16 +260,20 @@ static float* vImag = nullptr;                  // imaginary parts
 // Below options are forcing ArduinoFFT to use sqrtf() instead of sqrt()
 // #define sqrt_internal sqrtf          // see https://github.com/kosme/arduinoFFT/pull/83 - since v2.0.0 this must be done in build_flags

-#include <arduinoFFT.h>             // FFT object is created in FFTcode
 // Helper functions

 // compute average of several FFT result bins
 static float fftAddAvg(int from, int to) {
-  float result = 0.0f;
+  FFTmathType result = 0;
  for (int i = from; i <= to; i++) {
-    result += vReal[i];
+    result += valFFT[i];
  }
-  return result / float(to - from + 1);
+ #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+  result = result * 0.0625; // divide by 16 to reduce magnitude. Want end result to be scaled linear and ~4096 max.
+ #else
+  result *= 32; // scale result to match float values. note: raw scaling value between float and int is 512, float version is scaled down by 16
+#endif
+  return float(result) / float(to - from + 1); // return average as float
 }

 //
@@ -218,18 +282,61 @@ static float fftAddAvg(int from, int to) {
 void FFTcode(void * parameter)
 {
  DEBUGSR_PRINT("FFT started on core: "); DEBUGSR_PRINTLN(xPortGetCoreID());
-
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
  // allocate FFT buffers on first call
-  if (vReal == nullptr) vReal = (float*) calloc(samplesFFT, sizeof(float));
-  if (vImag == nullptr) vImag = (float*) calloc(samplesFFT, sizeof(float));
-  if ((vReal == nullptr) || (vImag == nullptr)) {
+  if (valFFT == nullptr) valFFT = (float*) calloc(samplesFFT, sizeof(float));
+  if (vImag == nullptr)  vImag  = (float*) calloc(samplesFFT, sizeof(float));
+  if ((valFFT == nullptr) || (vImag == nullptr)) {
    // something went wrong
-    if (vReal) free(vReal); vReal = nullptr;
+    if (valFFT) free(valFFT); valFFT = nullptr;
    if (vImag) free(vImag); vImag = nullptr;
    return;
  }
  // Create FFT object with weighing factor storage
-  ArduinoFFT<float> FFT = ArduinoFFT<float>( vReal, vImag, samplesFFT, SAMPLE_RATE, true);
+  ArduinoFFT<float> FFT = ArduinoFFT<float>(valFFT, vImag, samplesFFT, SAMPLE_RATE, true);
+#elif !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+  // allocate and initialize FFT buffers on first call
+  // note: free() is never used on these pointers. If it ever is implemented, this implementation can cause memory leaks (need to free raw pointers)
+  if (valFFT == nullptr) {
+    float* raw_buffer = (float*)heap_caps_malloc((2 * samplesFFT * sizeof(float)) + 16, MALLOC_CAP_8BIT);
+    if ((raw_buffer == nullptr)) return; // something went wrong
+    valFFT = (float*)(((uintptr_t)raw_buffer + 15) & ~15);  // SIMD requires aligned memory to 16-byte boundary. note in IDF5 there is MALLOC_CAP_SIMD available
+  }
+  // create window
+  if (windowFFT == nullptr) {
+    float* raw_buffer = (float*)heap_caps_malloc((samplesFFT * sizeof(float)) + 16, MALLOC_CAP_8BIT);
+    if ((raw_buffer == nullptr)) return; // something went wrong
+    windowFFT = (float*)(((uintptr_t)raw_buffer + 15) & ~15);  // SIMD requires aligned memory to 16-byte boundary
+  }
+  if (dsps_fft2r_init_fc32(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
+  // create window function for FFT
+#ifdef FFT_PREFER_EXACT_PEAKS
+  dsps_wind_blackman_harris_f32(windowFFT, samplesFFT);
+#else
+  dsps_wind_flat_top_f32(windowFFT, samplesFFT);
+#endif
+#else
+  // allocate and initialize integer FFT buffers on first call
+  if (valFFT == nullptr) valFFT = (int16_t*) calloc(sizeof(int16_t), samplesFFT * 2);
+  if ((valFFT == nullptr)) return; // something went wrong
+  // create window
+  if (windowFFT == nullptr) windowFFT = (int16_t*) calloc(sizeof(int16_t), samplesFFT);
+  if ((windowFFT == nullptr)) return; // something went wrong
+  if (dsps_fft2r_init_sc16(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
+  // create window function for FFT
+  float *windowFloat = (float*) calloc(sizeof(float), samplesFFT); // temporary buffer for window function
+  if ((windowFloat == nullptr)) return; // something went wrong
+#ifdef FFT_PREFER_EXACT_PEAKS
+  dsps_wind_blackman_harris_f32(windowFloat, samplesFFT);
+#else
+  dsps_wind_flat_top_f32(windowFloat, samplesFFT);
+#endif
+  // convert float window to 16-bit int
+  for (int i = 0; i < samplesFFT; i++) {
+    windowFFT[i] = (int16_t)(windowFloat[i] * 32767.0f);
+  }
+  free(windowFloat); // free temporary buffer
+#endif

  // see https://www.freertos.org/vtaskdelayuntil.html
  const TickType_t xFrequency = FFT_MIN_CYCLE * portTICK_PERIOD_MS;  
@@ -251,8 +358,7 @@ void FFTcode(void * parameter)
 #endif

    // get a fresh batch of samples from I2S
-    if (audioSource) audioSource->getSamples(vReal, samplesFFT);
-    memset(vImag, 0, samplesFFT * sizeof(float));   // set imaginary parts to 0
+    if (audioSource) audioSource->getSamples(valFFT, samplesFFT); // note: valFFT is used as a int16_t buffer on C3 and S2, could optimize RAM use by only allocating half the size (but makes code harder to read)

 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
    if (start < esp_timer_get_time()) { // filter out overflows
@@ -264,16 +370,15 @@ void FFTcode(void * parameter)

    xLastWakeTime = xTaskGetTickCount();       // update "last unblocked time" for vTaskDelay

-    // band pass filter - can reduce noise floor by a factor of 50
+    // band pass filter - can reduce noise floor by a factor of 50 and avoid aliasing effects to base & high frequency bands
    // downside: frequencies below 100Hz will be ignored
-    if (useBandPassFilter) runMicFilter(samplesFFT, vReal);
-
+    if (useMicFilter) runMicFilter(samplesFFT, valFFT);
    // find highest sample in the batch
-    float maxSample = 0.0f;                         // max sample from FFT batch
+    FFTsampleType maxSample = 0;                         // max sample from FFT batch
    for (int i=0; i < samplesFFT; i++) {
 	    // pick our  our current mic sample - we take the max value from all samples that go into FFT
-	    if ((vReal[i] <= (INT16_MAX - 1024)) && (vReal[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
-        if (fabsf((float)vReal[i]) > maxSample) maxSample = fabsf((float)vReal[i]);
+	    if ((valFFT[i] <= (INT16_MAX - 1024)) && (valFFT[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
+        if (FFTabs(valFFT[i]) > maxSample) maxSample = FFTabs(valFFT[i]);
    }
    // release highest sample to volume reactive effects early - not strictly necessary here - could also be done at the end of the function
    // early release allows the filters (getSample() and agcAvg()) to work with fresh values - we will have matching gain and noise gate values when we want to process the FFT results.
@@ -285,32 +390,97 @@ void FFTcode(void * parameter)
    if (sampleAvg > 0.25f) { // noise gate open means that FFT results will be used. Don't run FFT if results are not needed.
 #endif

-      // run FFT (takes 3-5ms on ESP32, ~12ms on ESP32-S2)
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+      // run Arduino FFT (takes 3-5ms on ESP32, ~12ms on ESP32-S2, ~20ms on ESP32-C3)
+      memset(vImag, 0, samplesFFT * sizeof(float));               // set imaginary parts to 0
      FFT.dcRemoval();                                            // remove DC offset
+#ifdef FFT_PREFER_EXACT_PEAKS
+      FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
+#else
      FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward); // Weigh data using "Flat Top" function - better amplitude accuracy
-      //FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
+#endif
      FFT.compute( FFTDirection::Forward );                       // Compute FFT
      FFT.complexToMagnitude();                                   // Compute magnitudes
-      vReal[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
-
-      FFT.majorPeak(&FFT_MajorPeak, &FFT_Magnitude);                // let the effects know which freq was most dominant
+      valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
+      FFT.majorPeak(&FFT_MajorPeak, &FFT_Magnitude);              // let the effects know which freq was most dominant
+      // note: scaling is done in fftAddAvg(), so we don't scale here
+#else
+      // run run float DSP FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3) TODO: test and fill in these values
+      // remove DC offset
+      FFTmathType sum = 0;
+      for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
+      FFTmathType mean = sum / (FFTmathType)samplesFFT;
+      for (int i = 0; i < samplesFFT; i++) valFFT[i] -= mean;
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+      //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
+      for (int i = samplesFFT - 1; i >= 0 ; i--) {
+        // fill the buffer back to front to avoid overwriting samples
+        float windowed_sample = valFFT[i] * windowFFT[i];
+        valFFT[i * 2] = windowed_sample;
+        valFFT[i * 2 + 1] = 0.0; // set imaginary part to zero
+      }
+#ifdef CONFIG_IDF_TARGET_ESP32S3
+      dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT
+#elif defined(CONFIG_IDF_TARGET_ESP32)
+      dsps_fft2r_fc32_ae32(valFFT, samplesFFT); // ESP32 optimized version of FFT
+#else
+      dsps_fft2r_fc32_ansi(valFFT, samplesFFT); // perform FFT using ANSI C implementation
+#endif
+      dsps_bit_rev_fc32(valFFT, samplesFFT);    // bit reverse
+      valFFT[0] = 0;  // set DC bin to 0, as it is not needed and can cause issues
+      // convert to magnitude & find FFT_MajorPeak and FFT_Magnitude
+      FFT_MajorPeak = 0;
+      FFT_Magnitude = 0;
+      for (int i = 1; i < samplesFFT_2; i++) {  // skip [0] as it is DC offset
+        float real_part = valFFT[i * 2];
+        float imag_part = valFFT[i * 2 + 1];
+        valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part);
+        if (valFFT[i] > FFT_Magnitude) {
+          FFT_Magnitude = valFFT[i];
+          FFT_MajorPeak = i*(SAMPLE_RATE/samplesFFT);
+        }
+        // note: scaling is done in fftAddAvg(), so we don't scale here
+      }
+#else
+      // run integer DSP FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~1.5 ms on ESP32-C3) TODO: test and fill in these values
+      //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
+      for (int i = samplesFFT - 1; i >= 0 ; i--) {
+        // fill the buffer back to front to avoid overwriting samples
+        int16_t windowed_sample = ((int32_t)valFFT[i] * (int32_t)windowFFT[i]) >> 15; // both values are ±15bit
+        valFFT[i * 2] = windowed_sample;
+        valFFT[i * 2 + 1] = 0; // set imaginary part to zero
+      }
+      dsps_fft2r_sc16_ansi(valFFT, samplesFFT); // perform FFT on complex value pairs (Re,Im)
+      dsps_bit_rev_sc16_ansi(valFFT, samplesFFT);    // bit reverse i.e. "unshuffle" the results
+      valFFT[0] = 0; // set DC bin to 0, as it is not needed and can cause issues
+      // convert to magnitude, FFT returns interleaved complex values [Re,Im,Re,Im,...]
+      int FFT_MajorPeak_int = 0;
+      int FFT_Magnitude_int = 0;
+      for (int i = 1; i < samplesFFT_2; i++) { // skip [0], it is DC offset
+        int32_t real_part = valFFT[i * 2];
+        int32_t imag_part = valFFT[i * 2 + 1];
+        valFFT[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
+        if (valFFT[i] > FFT_Magnitude_int) {
+          FFT_Magnitude_int = valFFT[i]; 
+          FFT_MajorPeak_int = ((i * SAMPLE_RATE)/samplesFFT);
+        }
+        // note: scaling is done in fftAddAvg(), so we don't scale here
+      }
+      FFT_Magnitude = FFT_Magnitude_int * 512; // scale to match raw float value
+      FFT_MajorPeak = FFT_MajorPeak_int;
+      FFT_Magnitude = FFT_Magnitude_int;
+#endif
+#endif
      FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects
-
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
      haveDoneFFT = true;
 #endif
-
-    } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this.
-      memset(vReal, 0, samplesFFT * sizeof(float));
+    } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this -> set all samples to 0
+      memset(valFFT, 0, samplesFFT * sizeof(FFTsampleType));
      FFT_MajorPeak = 1;
      FFT_Magnitude = 0.001;
    }

-    for (int i = 0; i < samplesFFT; i++) {
-      float t = fabsf(vReal[i]);                      // just to be sure - values in fft bins should be positive any way
-      vReal[i] = t / 16.0f;                           // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
-    } // for()
-
    // mapping of FFT result bins to frequency channels
    if (fabsf(sampleAvg) > 0.5f) { // noise gate open
 #if 0
@@ -341,7 +511,7 @@ void FFTcode(void * parameter)
      fftCalc[15] = fftAddAvg(194,250);   // 3880 - 5000 // avoid the last 5 bins, which are usually inaccurate
 #else
      /* new mapping, optimized for 22050 Hz by softhack007 */
-                                                    // bins frequency  range
+      // bins frequency  range
      if (useBandPassFilter) {
        // skip frequencies below 100hz
        fftCalc[ 0] = 0.8f * fftAddAvg(3,4);
@@ -403,12 +573,15 @@ void FFTcode(void * parameter)
 // Pre / Postprocessing  //
 ///////////////////////////

-static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // pre-filtering of raw samples (band-pass)
+static void runMicFilter(uint16_t numSamples, FFTsampleType *sampleBuffer)          // pre-filtering of raw samples (band-pass)
 {
-  // low frequency cutoff parameter - see https://dsp.stackexchange.com/questions/40462/exponential-moving-average-cut-off-frequency
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+  // low frequency cutoff parameter - see https://dsp.stackexchange.com/questions/40462/exponential-moving-average-cut-off-frequency (alpha = 2π × fc / fs)
  //constexpr float alpha = 0.04f;   // 150Hz
  //constexpr float alpha = 0.03f;   // 110Hz
-  constexpr float alpha = 0.0225f; // 80hz
+  //constexpr float alpha = 0.0285f; //100Hz
+  constexpr float alpha = 0.0256f; //90Hz
+  //constexpr float alpha = 0.0225f; // 80hz
  //constexpr float alpha = 0.01693f;// 60hz
  // high frequency cutoff  parameter
  //constexpr float beta1 = 0.75f;   // 11Khz
@@ -432,6 +605,39 @@ static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // p
        lowfilt += alpha * (sampleBuffer[i] - lowfilt);
        sampleBuffer[i] = sampleBuffer[i] - lowfilt;
  }
+#else
+  // low frequency cutoff parameter 17.15 fixed point format
+  //constexpr int32_t ALPHA_FP = 1311;    // 0.04f * (1<<15) (150Hz)
+  //constexpr int32_t ALPHA_FP = 983;     // 0.03f * (1<<15) (110Hz)
+  //constexpr int32_t ALPHA_FP = 934;     // 0.0285f * (1<<15) (100Hz)
+  constexpr int32_t ALPHA_FP = 840;       // 0.0256f * (1<<15) (90Hz)
+  //constexpr int32_t ALPHA_FP = 737;     // 0.0225f * (1<<15) (80Hz)
+  //constexpr int32_t ALPHA_FP = 555;     // 0.01693f * (1<<15) (60Hz)
+
+  // high frequency cutoff parameters 16.16 fixed point format
+  //constexpr int32_t BETA1_FP = 49152;   // 0.75f * (1<<16) (11KHz)
+  //constexpr int32_t BETA1_FP = 53740;   // 0.82f * (1<<16) (15KHz)
+  //constexpr int32_t BETA1_FP = 54297;   // 0.8285f * (1<<16) (18KHz)
+  constexpr int32_t BETA1_FP = 55706;     // 0.85f * (1<<16) (20KHz)
+  constexpr int32_t BETA2_FP = (65536 - BETA1_FP) / 2;  // ((1.0f - beta1) / 2.0f) * (1<<16)
+
+  static int32_t last_vals[2] = { 0 };    // FIR high freq cutoff filter (scaled by sample range)
+  static int32_t lowfilt_fp = 0;          // IIR low frequency cutoff filter (16.16 fixed point)
+
+  for (int i = 0; i < numSamples; i++) {
+    // FIR lowpass filter to remove high frequency noise
+    int32_t highFilteredSample_fp;
+
+    if (i < (numSamples - 1))
+      highFilteredSample_fp = (BETA1_FP * (int32_t)sampleBuffer[i] + BETA2_FP * last_vals[0] + BETA2_FP * (int32_t)sampleBuffer[i + 1]) >> 16; // smooth out spikes
+    else
+      highFilteredSample_fp = (BETA1_FP * (int32_t)sampleBuffer[i] + BETA2_FP * last_vals[0] + BETA2_FP * last_vals[1]) >> 16; // special handling for last sample in array
+    last_vals[1] = last_vals[0];
+    last_vals[0] = (int32_t)sampleBuffer[i];
+    lowfilt_fp += ALPHA_FP * (highFilteredSample_fp - (lowfilt_fp >> 15)); // low pass filter in 17.15 fixed point format
+    sampleBuffer[i] = highFilteredSample_fp - (lowfilt_fp >> 15);
+  }
+#endif
 }

 static void postProcessFFTResults(bool noiseGateOpen, int numberOfChannels) // post-processing and post-amp of GEQ channels
@@ -520,7 +726,7 @@ static void detectSamplePeak(void) {
  // Poor man's beat detection by seeing if sample > Average + some value.
  // This goes through ALL of the 255 bins - but ignores stupid settings
  // Then we got a peak, else we don't. The peak has to time out on its own in order to support UDP sound sync.
-  if ((sampleAvg > 1) && (maxVol > 0) && (binNum > 4) && (vReal[binNum] > maxVol) && ((millis() - timeOfPeak) > 100)) {
+  if ((sampleAvg > 1) && (maxVol > 0) && (binNum > 4) && (valFFT[binNum] > maxVol) && ((millis() - timeOfPeak) > 100)) {
    havePeak = true;
  }

@@ -1165,8 +1371,8 @@ class AudioReactive : public Usermod {
        periph_module_reset(PERIPH_I2S0_MODULE);   // not possible on -C3
      #endif
      delay(100);         // Give that poor microphone some time to setup.
-
-      useBandPassFilter = false;
+      useBandPassFilter = false; // filter cuts lowest and highest frequency bands from FFT result (use on very noisy mic inputs)
+      useMicFilter = true;       // filter fixes aliasing to base & highest frequency bands and reduces noise floor (recommended for all mic inputs)

      #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
        if ((i2sckPin == I2S_PIN_NO_CHANGE) && (i2ssdPin >= 0) && (i2swsPin >= 0) && ((dmType == 1) || (dmType == 4)) ) dmType = 5;   // dummy user support: SCK == -1 --means--> PDM microphone
@@ -1201,6 +1407,7 @@ class AudioReactive : public Usermod {
        case 4:
          DEBUGSR_PRINT(F("AR: Generic I2S Microphone with Master Clock - ")); DEBUGSR_PRINTLN(F(I2S_MIC_CHANNEL_TEXT));
          audioSource = new I2SSource(SAMPLE_RATE, BLOCK_SIZE, 1.0f/24.0f);
+          useMicFilter = false; // I2S with Master Clock is mostly used for line-in, skip sample filtering
          delay(100);
          if (audioSource) audioSource->initialize(i2swsPin, i2ssdPin, i2sckPin, mclkPin);
          break;
@@ -1216,6 +1423,7 @@ class AudioReactive : public Usermod {
        case 6:
          DEBUGSR_PRINTLN(F("AR: ES8388 Source"));
          audioSource = new ES8388Source(SAMPLE_RATE, BLOCK_SIZE);
+          useMicFilter = false;
          delay(100);
          if (audioSource) audioSource->initialize(i2swsPin, i2ssdPin, i2sckPin, mclkPin);
          break;
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -22,7 +22,7 @@

 // see https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/hw-reference/chip-series-comparison.html#related-documents
 // and https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/api-reference/peripherals/i2s.html#overview-of-all-modes
-#if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265)
+#if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265)
  // there are two things in these MCUs that could lead to problems with audio processing:
  // * no floating point hardware (FPU) support - FFT uses float calculations. If done in software, a strong slow-down can be expected (between 8x and 20x)
  // * single core, so FFT task might slow down other things like LED updates
@@ -134,7 +134,7 @@ class AudioSource {
       Read num_samples from the microphone, and store them in the provided
       buffer
    */
-    virtual void getSamples(float *buffer, uint16_t num_samples) = 0;
+    virtual void getSamples(FFTsampleType *buffer, uint16_t num_samples) = 0;

    /* check if the audio source driver was initialized successfully */
    virtual bool isInitialized(void) {return(_initialized);}
@@ -316,7 +316,7 @@ class I2SSource : public AudioSource {
      if (_mclkPin != I2S_PIN_NO_CHANGE) PinManager::deallocatePin(_mclkPin, PinOwner::UM_Audioreactive);
    }

-    virtual void getSamples(float *buffer, uint16_t num_samples) {
+    virtual void getSamples(FFTsampleType *buffer, uint16_t num_samples) {
      if (_initialized) {
        esp_err_t err;
        size_t bytes_read = 0;        /* Counter variable to check if we actually got enough data */
@@ -334,19 +334,36 @@ class I2SSource : public AudioSource {
          return;
        }

-        // Store samples in sample buffer and update DC offset
-        for (int i = 0; i < num_samples; i++) {
-
-          newSamples[i] = postProcessSample(newSamples[i]);  // perform postprocessing (needed for ADC samples)
-          
-          float currSample = 0.0f;
-#ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
-              currSample = (float) newSamples[i] / 65536.0f;      // 32bit input -> 16bit; keeping lower 16bits as decimal places
-#else
-              currSample = (float) newSamples[i];                 // 16bit input -> use as-is
+        // Store samples in sample buffer
+#if defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+        //constexpr int32_t FIXEDSHIFT = 8; // shift by 8 bits for fixed point math (no loss at 24bit input sample resolution)
+        //int32_t intSampleScale = _sampleScale * (1<<FIXEDSHIFT); // _sampleScale <= 1.0f, shift for fixed point math
 #endif
+
+        for (int i = 0; i < num_samples; i++) {
+          newSamples[i] = postProcessSample(newSamples[i]);  // perform postprocessing (needed for ADC samples)
+
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+  #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
+          float currSample = (float) newSamples[i] / 65536.0f;      // 32bit input -> 16bit; keeping lower 16bits as decimal places
+  #else
+          float currSample = (float) newSamples[i];                 // 16bit input -> use as-is
+  #endif
          buffer[i] = currSample;
-          buffer[i] *= _sampleScale;                              // scale samples
+          buffer[i] *= _sampleScale;                               // scale samples
+#else
+  #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
+          // note on sample scaling: scaling is only used for inputs with master clock and those are better suited for ESP32 or S3
+          // execution speed is critical on single core MCUs
+          //int32_t currSample = newSamples[i] >> FIXEDSHIFT;   // shift to avoid overlow in multiplication
+          //currSample = (currSample * intSampleScale) >> 16;   // scale samples, shift down to 16bit
+          int16_t currSample = newSamples[i] >> 16;           // no sample scaling, just shift down to 16bit (not scaling saves ~0.4ms on C3)
+  #else
+          //int32_t currSample = (newSamples[i] * intSampleScale) >> FIXEDSHIFT;   // scale samples, shift back down to 16bit
+          int16_t currSample = newSamples[i];                 // 16bit input -> use as-is
+  #endif
+          buffer[i] = (int16_t)currSample;
+#endif
        }
      }
    }
@@ -689,7 +706,7 @@ class I2SAdcSource : public I2SSource {
    }


-    void getSamples(float *buffer, uint16_t num_samples) {
+    void getSamples(FFTsampleType *buffer, uint16_t num_samples) {
      /* Enable ADC. This has to be enabled and disabled directly before and
       * after sampling, otherwise Wifi dies
       */