use fast path in blendSegment to bump up FPS (#5464)

* use fast path in blendSegment to bump up FPS
2026-04-22 07:02:51 +00:00 · 2026-04-16 07:45:00 +02:00
parent 35ce05a73b
commit 910caae463
1 changed files with 63 additions and 6 deletions
--- a/wled00/FX_fcn.cpp
+++ b/wled00/FX_fcn.cpp
@@ -1367,6 +1367,7 @@ static uint8_t _dummy     (uint8_t a, uint8_t b) { return a; } // dummy (same as
 void WS2812FX::blendSegment(const Segment &topSegment) const {
  typedef uint8_t(*FuncType)(uint8_t, uint8_t);
  // function pointer array: fill with _dummy if using special case: avoid OOB access and always provide a valid path
+  // note: making the function array static const uses more ram and comes at no significant speed gain
  FuncType funcs[] = {
    _dummy,      _dummy,     _dummy,    _subtract,
    _difference, _average,   _dummy,    _divide,
@@ -1398,14 +1399,71 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
  const size_t  matrixSize = Segment::maxWidth * Segment::maxHeight;
  const size_t  startIndx  = XY(topSegment.start, topSegment.startY);
  const size_t  stopIndx   = startIndx + length;
-  const unsigned progress  = topSegment.progress();
-  const unsigned progInv   = 0xFFFFU - progress;
  uint8_t       opacity    = topSegment.currentBri(); // returns transitioned opacity for style FADE
  uint8_t       cct        = topSegment.currentCCT();
  if (gammaCorrectCol) opacity = gamma8inv(opacity); // use inverse gamma on brightness for correct color scaling after gamma correction (see #5343 for details)

-  Segment::setClippingRect(0, 0);             // disable clipping by default
+  const Segment *segO = topSegment.getOldSegment();
+  const bool hasGrouping = topSegment.groupLength() != 1;

+  // fast path: handle the default case - no transitions, no grouping/spacing, no mirroring, no CCT
+  if (!segO && blendingStyle == TRANSITION_FADE && !hasGrouping && !topSegment.mirror && !topSegment.mirror_y) {
+    if (isMatrix && stopIndx <= matrixSize && !_pixelCCT) {
+#ifndef WLED_DISABLE_2D
+      // Calculate pointer steps to avoid 'if' and 'XY()' inside loops
+      int x_inc = 1;
+      int y_inc = Segment::maxWidth;
+      int start_offset = XY(topSegment.start, topSegment.startY);
+
+      // adjust starting position and steps based on Reverse/Transpose
+      // note: transpose is handled in separate loop so it is still fast and no branching is needed in default path
+      if (!topSegment.transpose) {
+        if (topSegment.reverse)   { start_offset += (width - 1); x_inc = -1; }
+        if (topSegment.reverse_y) { start_offset += (height - 1) * Segment::maxWidth; y_inc = -Segment::maxWidth; }
+
+        for (int y = 0; y < height; y++) {
+          uint32_t* pRow = &_pixels[start_offset + y * y_inc];
+          const int y_width = y * width;
+          for (int x = 0; x < width; x++) {
+            uint32_t* p = pRow + x * x_inc;
+            uint32_t c_a = topSegment.getPixelColorRaw(x + y_width);
+            *p = color_blend(*p, segblend(c_a, *p), opacity);
+          }
+        }
+      } else { // transposed
+        for (int y = 0; y < height; y++) {
+          const int px = topSegment.reverse ? (height - y - 1) : y;  // source pixel: swap y into x, reverse if needed
+          for (int x = 0; x < width; x++) {
+            const int py = topSegment.reverse_y ? (width  - x - 1) : x;  // source pixel: swap x into y, reverse if needed
+            const uint32_t c_a = topSegment.getPixelColorRaw(px + py * height); // height = virtual width
+            const size_t idx = XY(topSegment.start + x, topSegment.startY + y); // write logical (non swapped) pixel coordinate
+            _pixels[idx] = color_blend(_pixels[idx], segblend(c_a, _pixels[idx]), opacity);
+          }
+        }
+      }
+      return;
+#endif
+    } else if (!isMatrix) {
+      // 1D fast path, include CCT as it is more common on 1D setups
+      uint32_t* strip = _pixels;
+      int start = topSegment.start;
+      int off   = topSegment.offset;
+      for (int i = 0; i < length; i++) {
+        uint32_t c_a = topSegment.getPixelColorRaw(i);
+        int p = topSegment.reverse ? (length - i - 1) : i;
+        int idx = start + p + off;
+        if (idx >= topSegment.stop) idx -= length;
+        strip[idx] = color_blend(strip[idx], segblend(c_a, strip[idx]), opacity);
+        if (_pixelCCT) _pixelCCT[idx] = cct;
+      }
+      return;
+    }
+  }
+
+  // slow path: handle transitions, grouping/spacing, segments with clipping and CCT pixels
+  Segment::setClippingRect(0, 0);  // disable clipping by default
+  const unsigned progress = topSegment.progress();
+  const unsigned progInv  = 0xFFFFU - progress;
  const unsigned dw = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * width / 0xFFFFU + 1;
  const unsigned dh = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * height / 0xFFFFU + 1;
  const unsigned orgBS = blendingStyle;
@@ -1466,7 +1524,6 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
 #ifndef WLED_DISABLE_2D
    const int nCols = topSegment.virtualWidth();
    const int nRows = topSegment.virtualHeight();
-    const Segment *segO = topSegment.getOldSegment();
    const int oCols = segO ? segO->virtualWidth() : nCols;
    const int oRows = segO ? segO->virtualHeight() : nRows;

@@ -1562,8 +1619,8 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
    }
 #endif
  } else {
+    // 1D Slow Path
    const int nLen = topSegment.virtualLength();
-    const Segment *segO = topSegment.getOldSegment();
    const int oLen = segO ? segO->virtualLength() : nLen;

    const auto setMirroredPixel = [&](int i, uint32_t c, uint8_t o) {
@@ -1643,7 +1700,7 @@ void WS2812FX::show() {

  if (realtimeMode == REALTIME_MODE_INACTIVE || useMainSegmentOnly || realtimeOverride > REALTIME_OVERRIDE_NONE) {
    // clear frame buffer
-    for (size_t i = 0; i < totalLen; i++) _pixels[i] = BLACK; // memset(_pixels, 0, sizeof(uint32_t) * getLengthTotal());
+    memset(_pixels, 0, sizeof(uint32_t) * totalLen);
    // blend all segments into (cleared) buffer
    for (Segment &seg : _segments) if (seg.isActive() && (seg.on || seg.isInTransition())) {
      blendSegment(seg);              // blend segment's buffer into frame buffer