use fast path in blendSegment to bump up FPS (#5464)

* use fast path in blendSegment to bump up FPS
This commit is contained in:
Damian Schneider
2026-04-16 07:45:00 +02:00
committed by GitHub
parent 35ce05a73b
commit 910caae463

View File

@@ -1367,6 +1367,7 @@ static uint8_t _dummy (uint8_t a, uint8_t b) { return a; } // dummy (same as
void WS2812FX::blendSegment(const Segment &topSegment) const {
typedef uint8_t(*FuncType)(uint8_t, uint8_t);
// function pointer array: fill with _dummy if using special case: avoid OOB access and always provide a valid path
// note: making the function array static const uses more ram and comes at no significant speed gain
FuncType funcs[] = {
_dummy, _dummy, _dummy, _subtract,
_difference, _average, _dummy, _divide,
@@ -1398,14 +1399,71 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
const size_t matrixSize = Segment::maxWidth * Segment::maxHeight;
const size_t startIndx = XY(topSegment.start, topSegment.startY);
const size_t stopIndx = startIndx + length;
const unsigned progress = topSegment.progress();
const unsigned progInv = 0xFFFFU - progress;
uint8_t opacity = topSegment.currentBri(); // returns transitioned opacity for style FADE
uint8_t cct = topSegment.currentCCT();
if (gammaCorrectCol) opacity = gamma8inv(opacity); // use inverse gamma on brightness for correct color scaling after gamma correction (see #5343 for details)
Segment::setClippingRect(0, 0); // disable clipping by default
const Segment *segO = topSegment.getOldSegment();
const bool hasGrouping = topSegment.groupLength() != 1;
// fast path: handle the default case - no transitions, no grouping/spacing, no mirroring, no CCT
if (!segO && blendingStyle == TRANSITION_FADE && !hasGrouping && !topSegment.mirror && !topSegment.mirror_y) {
if (isMatrix && stopIndx <= matrixSize && !_pixelCCT) {
#ifndef WLED_DISABLE_2D
// Calculate pointer steps to avoid 'if' and 'XY()' inside loops
int x_inc = 1;
int y_inc = Segment::maxWidth;
int start_offset = XY(topSegment.start, topSegment.startY);
// adjust starting position and steps based on Reverse/Transpose
// note: transpose is handled in separate loop so it is still fast and no branching is needed in default path
if (!topSegment.transpose) {
if (topSegment.reverse) { start_offset += (width - 1); x_inc = -1; }
if (topSegment.reverse_y) { start_offset += (height - 1) * Segment::maxWidth; y_inc = -Segment::maxWidth; }
for (int y = 0; y < height; y++) {
uint32_t* pRow = &_pixels[start_offset + y * y_inc];
const int y_width = y * width;
for (int x = 0; x < width; x++) {
uint32_t* p = pRow + x * x_inc;
uint32_t c_a = topSegment.getPixelColorRaw(x + y_width);
*p = color_blend(*p, segblend(c_a, *p), opacity);
}
}
} else { // transposed
for (int y = 0; y < height; y++) {
const int px = topSegment.reverse ? (height - y - 1) : y; // source pixel: swap y into x, reverse if needed
for (int x = 0; x < width; x++) {
const int py = topSegment.reverse_y ? (width - x - 1) : x; // source pixel: swap x into y, reverse if needed
const uint32_t c_a = topSegment.getPixelColorRaw(px + py * height); // height = virtual width
const size_t idx = XY(topSegment.start + x, topSegment.startY + y); // write logical (non swapped) pixel coordinate
_pixels[idx] = color_blend(_pixels[idx], segblend(c_a, _pixels[idx]), opacity);
}
}
}
return;
#endif
} else if (!isMatrix) {
// 1D fast path, include CCT as it is more common on 1D setups
uint32_t* strip = _pixels;
int start = topSegment.start;
int off = topSegment.offset;
for (int i = 0; i < length; i++) {
uint32_t c_a = topSegment.getPixelColorRaw(i);
int p = topSegment.reverse ? (length - i - 1) : i;
int idx = start + p + off;
if (idx >= topSegment.stop) idx -= length;
strip[idx] = color_blend(strip[idx], segblend(c_a, strip[idx]), opacity);
if (_pixelCCT) _pixelCCT[idx] = cct;
}
return;
}
}
// slow path: handle transitions, grouping/spacing, segments with clipping and CCT pixels
Segment::setClippingRect(0, 0); // disable clipping by default
const unsigned progress = topSegment.progress();
const unsigned progInv = 0xFFFFU - progress;
const unsigned dw = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * width / 0xFFFFU + 1;
const unsigned dh = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * height / 0xFFFFU + 1;
const unsigned orgBS = blendingStyle;
@@ -1466,7 +1524,6 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
#ifndef WLED_DISABLE_2D
const int nCols = topSegment.virtualWidth();
const int nRows = topSegment.virtualHeight();
const Segment *segO = topSegment.getOldSegment();
const int oCols = segO ? segO->virtualWidth() : nCols;
const int oRows = segO ? segO->virtualHeight() : nRows;
@@ -1562,8 +1619,8 @@ void WS2812FX::blendSegment(const Segment &topSegment) const {
}
#endif
} else {
// 1D Slow Path
const int nLen = topSegment.virtualLength();
const Segment *segO = topSegment.getOldSegment();
const int oLen = segO ? segO->virtualLength() : nLen;
const auto setMirroredPixel = [&](int i, uint32_t c, uint8_t o) {
@@ -1643,7 +1700,7 @@ void WS2812FX::show() {
if (realtimeMode == REALTIME_MODE_INACTIVE || useMainSegmentOnly || realtimeOverride > REALTIME_OVERRIDE_NONE) {
// clear frame buffer
for (size_t i = 0; i < totalLen; i++) _pixels[i] = BLACK; // memset(_pixels, 0, sizeof(uint32_t) * getLengthTotal());
memset(_pixels, 0, sizeof(uint32_t) * totalLen);
// blend all segments into (cleared) buffer
for (Segment &seg : _segments) if (seg.isActive() && (seg.on || seg.isInTransition())) {
blendSegment(seg); // blend segment's buffer into frame buffer