cleanup and improved color_add()

- optimized color_add() again: now it is as fast with preserved ratio scaling than the "fast" variant was before (if no scaling is needed, it is even faster). plus it saves 250 bytes of flash
- bugfix in `color_fade()`
- removed a lot of whitespaces
This commit is contained in:
Damian Schneider 2024-09-18 22:10:27 +02:00
parent 686866c6f9
commit 696290527a
5 changed files with 67 additions and 71 deletions

View File

@ -595,9 +595,9 @@ typedef struct Segment {
void fadeToBlackBy(uint8_t fadeBy);
inline void blendPixelColor(int n, uint32_t color, uint8_t blend) { setPixelColor(n, color_blend(getPixelColor(n), color, blend)); }
inline void blendPixelColor(int n, CRGB c, uint8_t blend) { blendPixelColor(n, RGBW32(c.r,c.g,c.b,0), blend); }
inline void addPixelColor(int n, uint32_t color, bool fast = false) { setPixelColor(n, color_add(getPixelColor(n), color, fast)); }
inline void addPixelColor(int n, byte r, byte g, byte b, byte w = 0, bool fast = false) { addPixelColor(n, RGBW32(r,g,b,w), fast); }
inline void addPixelColor(int n, CRGB c, bool fast = false) { addPixelColor(n, RGBW32(c.r,c.g,c.b,0), fast); }
inline void addPixelColor(int n, uint32_t color) { setPixelColor(n, color_add(getPixelColor(n), color)); }
inline void addPixelColor(int n, byte r, byte g, byte b, byte w = 0) { addPixelColor(n, RGBW32(r,g,b,w)); }
inline void addPixelColor(int n, CRGB c) { addPixelColor(n, RGBW32(c.r,c.g,c.b,0)); }
inline void fadePixelColor(uint16_t n, uint8_t fade) { setPixelColor(n, color_fade(getPixelColor(n), fade, true)); }
[[gnu::hot]] uint32_t color_from_palette(uint16_t, bool mapping, bool wrap, uint8_t mcol, uint8_t pbri = 255) const;
[[gnu::hot]] uint32_t color_wheel(uint8_t pos) const;
@ -605,11 +605,11 @@ typedef struct Segment {
// 2D Blur: shortcuts for bluring columns or rows only (50% faster than full 2D blur)
inline void blurCols(fract8 blur_amount, bool smear = false) { // blur all columns
const unsigned cols = virtualWidth();
for (unsigned k = 0; k < cols; k++) blurCol(k, blur_amount, smear);
for (unsigned k = 0; k < cols; k++) blurCol(k, blur_amount, smear);
}
inline void blurRows(fract8 blur_amount, bool smear = false) { // blur all rows
const unsigned rows = virtualHeight();
for ( unsigned i = 0; i < rows; i++) blurRow(i, blur_amount, smear);
for ( unsigned i = 0; i < rows; i++) blurRow(i, blur_amount, smear);
}
// 2D matrix
@ -632,10 +632,10 @@ typedef struct Segment {
// 2D support functions
inline void blendPixelColorXY(uint16_t x, uint16_t y, uint32_t color, uint8_t blend) { setPixelColorXY(x, y, color_blend(getPixelColorXY(x,y), color, blend)); }
inline void blendPixelColorXY(uint16_t x, uint16_t y, CRGB c, uint8_t blend) { blendPixelColorXY(x, y, RGBW32(c.r,c.g,c.b,0), blend); }
inline void addPixelColorXY(int x, int y, uint32_t color, bool fast = false) { setPixelColorXY(x, y, color_add(getPixelColorXY(x,y), color, fast)); }
inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0, bool fast = false) { addPixelColorXY(x, y, RGBW32(r,g,b,w), fast); }
inline void addPixelColorXY(int x, int y, CRGB c, bool fast = false) { addPixelColorXY(x, y, RGBW32(c.r,c.g,c.b,0), fast); }
inline void fadePixelColorXY(uint16_t x, uint16_t y, uint8_t fade) { setPixelColorXY(x, y, color_fade(getPixelColorXY(x,y), fade, true)); }
inline void addPixelColorXY(int x, int y, uint32_t color) { setPixelColorXY(x, y, color_add(getPixelColorXY(x,y), color)); }
inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0) { addPixelColorXY(x, y, RGBW32(r,g,b,w)); }
inline void addPixelColorXY(int x, int y, CRGB c) { addPixelColorXY(x, y, RGBW32(c.r,c.g,c.b,0)); }
inline void fadePixelColorXY(uint16_t x, uint16_t y, uint8_t fade) { setPixelColorXY(x, y, color_fade(getPixelColorXY(x,y), fade, true)); }
void box_blur(unsigned r = 1U, bool smear = false); // 2D box blur
void blur2D(uint8_t blur_amount, bool smear = false);
void blurRow(uint32_t row, fract8 blur_amount, bool smear = false);
@ -670,9 +670,9 @@ typedef struct Segment {
inline uint32_t getPixelColorXY(int x, int y) { return getPixelColor(x); }
inline void blendPixelColorXY(uint16_t x, uint16_t y, uint32_t c, uint8_t blend) { blendPixelColor(x, c, blend); }
inline void blendPixelColorXY(uint16_t x, uint16_t y, CRGB c, uint8_t blend) { blendPixelColor(x, RGBW32(c.r,c.g,c.b,0), blend); }
inline void addPixelColorXY(int x, int y, uint32_t color, bool fast = false) { addPixelColor(x, color, fast); }
inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0, bool fast = false) { addPixelColor(x, RGBW32(r,g,b,w), fast); }
inline void addPixelColorXY(int x, int y, CRGB c, bool fast = false) { addPixelColor(x, RGBW32(c.r,c.g,c.b,0), fast); }
inline void addPixelColorXY(int x, int y, uint32_t color) { addPixelColor(x, color); }
inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0) { addPixelColor(x, RGBW32(r,g,b,w)); }
inline void addPixelColorXY(int x, int y, CRGB c) { addPixelColor(x, RGBW32(c.r,c.g,c.b,0)); }
inline void fadePixelColorXY(uint16_t x, uint16_t y, uint8_t fade) { fadePixelColor(x, fade); }
inline void box_blur(unsigned i, bool vertical, fract8 blur_amount) {}
inline void blur2D(uint8_t blur_amount, bool smear = false) {}

View File

@ -173,7 +173,7 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col)
if (!isActive()) return; // not active
if (x >= virtualWidth() || y >= virtualHeight() || x<0 || y<0) return; // if pixel would fall out of virtual segment just exit
uint8_t _bri_t = currentBri();
uint8_t _bri_t = currentBri();
if (_bri_t < 255) {
col = color_fade(col, _bri_t);
}
@ -185,11 +185,11 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col)
y *= groupLength(); // expand to physical pixels
int W = width();
int H = height();
int yY = y;
for (int j = 0; j < grouping; j++) { // groupping vertically
if(yY >= H) break;
int xX = x;
int xX = x;
for (int g = 0; g < grouping; g++) { // groupping horizontally
if (xX >= W) continue; // we have reached one dimension's end
#ifndef WLED_DISABLE_MODE_BLEND
@ -293,8 +293,8 @@ void Segment::blurRow(uint32_t row, fract8 blur_amount, bool smear){
curnew = color_fade(cur, keep);
if (x > 0) {
if (carryover)
curnew = color_add(curnew, carryover, true);
uint32_t prev = color_add(lastnew, part, true);
curnew = color_add(curnew, carryover);
uint32_t prev = color_add(lastnew, part);
if (last != prev) // optimization: only set pixel if color has changed
setPixelColorXY(x - 1, row, prev);
} else // first pixel
@ -326,15 +326,15 @@ void Segment::blurCol(uint32_t col, fract8 blur_amount, bool smear) {
curnew = color_fade(cur, keep);
if (y > 0) {
if (carryover)
curnew = color_add(curnew, carryover, true);
uint32_t prev = color_add(lastnew, part, true);
curnew = color_add(curnew, carryover);
uint32_t prev = color_add(lastnew, part);
if (last != prev) // optimization: only set pixel if color has changed
setPixelColorXY(col, y - 1, prev);
} else // first pixel
setPixelColorXY(col, y, curnew);
lastnew = curnew;
last = cur; //save original value for comparison on next iteration
carryover = part;
carryover = part;
}
setPixelColorXY(col, rows - 1, curnew);
}
@ -356,8 +356,8 @@ void Segment::blur2D(uint8_t blur_amount, bool smear) {
uint32_t part = color_fade(cur, seep);
curnew = color_fade(cur, keep);
if (x > 0) {
if (carryover) curnew = color_add(curnew, carryover, true);
uint32_t prev = color_add(lastnew, part, true);
if (carryover) curnew = color_add(curnew, carryover);
uint32_t prev = color_add(lastnew, part);
// optimization: only set pixel if color has changed
if (last != prev) setPixelColorXY(x - 1, row, prev);
} else setPixelColorXY(x, row, curnew); // first pixel
@ -375,14 +375,14 @@ void Segment::blur2D(uint8_t blur_amount, bool smear) {
uint32_t part = color_fade(cur, seep);
curnew = color_fade(cur, keep);
if (y > 0) {
if (carryover) curnew = color_add(curnew, carryover, true);
uint32_t prev = color_add(lastnew, part, true);
if (carryover) curnew = color_add(curnew, carryover);
uint32_t prev = color_add(lastnew, part);
// optimization: only set pixel if color has changed
if (last != prev) setPixelColorXY(col, y - 1, prev);
} else setPixelColorXY(col, y, curnew); // first pixel
lastnew = curnew;
last = cur; //save original value for comparison on next iteration
carryover = part;
carryover = part;
}
setPixelColorXY(col, rows - 1, curnew);
}

View File

@ -712,12 +712,12 @@ void IRAM_ATTR_YN Segment::setPixelColor(int i, uint32_t col)
{
if (!isActive()) return; // not active
#ifndef WLED_DISABLE_2D
int vStrip;
int vStrip;
#endif
if (i >= virtualLength() || i<0) // pixel would fall out of segment, check if this is a virtual strip NOTE: this is almost always false if not virtual strip, saves the calculation on 'standard' call
{
#ifndef WLED_DISABLE_2D
vStrip = i>>16; // hack to allow running on virtual strips (2D segment columns/rows)
vStrip = i>>16; // hack to allow running on virtual strips (2D segment columns/rows)
#endif
i &= 0xFFFF; //truncate vstrip index
if (i >= virtualLength() || i<0) return; // if pixel would still fall out of segment just exit
@ -735,7 +735,7 @@ void IRAM_ATTR_YN Segment::setPixelColor(int i, uint32_t col)
case M12_pBar:
// expand 1D effect vertically or have it play on virtual strips
if (vStrip>0) setPixelColorXY(vStrip - 1, vH - i - 1, col);
else for (int x = 0; x < vW; x++) setPixelColorXY(x, vH - i - 1, col);
else for (int x = 0; x < vW; x++) setPixelColorXY(x, vH - i - 1, col);
break;
case M12_pArc:
// expand in circular fashion from center
@ -796,7 +796,7 @@ void IRAM_ATTR_YN Segment::setPixelColor(int i, uint32_t col)
// Odd rays start further from center if prevRay started at center.
static int prevRay = INT_MIN; // previous ray number
if ((i % 2 == 1) && (i - 1 == prevRay || i + 1 == prevRay)) {
int jump = min(vW/3, vH/3); // can add 2 if using medium pinwheel
int jump = min(vW/3, vH/3); // can add 2 if using medium pinwheel
posx += inc_x * jump;
posy += inc_y * jump;
}
@ -1145,8 +1145,8 @@ void Segment::blur(uint8_t blur_amount, bool smear) {
uint32_t part = color_fade(cur, seep);
curnew = color_fade(cur, keep);
if (i > 0) {
if (carryover) curnew = color_add(curnew, carryover, true);
uint32_t prev = color_add(lastnew, part, true);
if (carryover) curnew = color_add(curnew, carryover);
uint32_t prev = color_add(lastnew, part);
// optimization: only set pixel if color has changed
if (last != prev) setPixelColor(i - 1, prev);
} else // first pixel
@ -1188,7 +1188,7 @@ uint32_t Segment::color_wheel(uint8_t pos) const {
* @returns Single color from palette
*/
uint32_t Segment::color_from_palette(uint16_t i, bool mapping, bool wrap, uint8_t mcol, uint8_t pbri) const {
uint32_t color = currentColor(mcol);
// default palette or no RGB support on segment
if ((palette == 0 && mcol < NUM_COLORS) || !_isRGB) {
@ -1196,7 +1196,7 @@ uint32_t Segment::color_from_palette(uint16_t i, bool mapping, bool wrap, uint8_
return (pbri == 255) ? color : color_fade(color, pbri, true);
}
uint8_t paletteIndex = i;
unsigned paletteIndex = i;
if (mapping && virtualLength() > 1) paletteIndex = (i*255)/(virtualLength() -1);
// paletteBlend: 0 - wrap when moving, 1 - always wrap, 2 - never wrap, 3 - none (undefined)
if (!wrap && strip.paletteBlend != 3) paletteIndex = scale8(paletteIndex, 240); //cut off blend at palette "end"

View File

@ -33,33 +33,32 @@ uint32_t color_blend(uint32_t color1, uint32_t color2, uint16_t blend, bool b16)
/*
* color add function that preserves ratio
* idea: https://github.com/Aircoookie/WLED/pull/2465 by https://github.com/Proto-molecule
* original idea: https://github.com/Aircoookie/WLED/pull/2465 by https://github.com/Proto-molecule
* heavily optimized for speed by @dedehai
*/
uint32_t color_add(uint32_t c1, uint32_t c2, bool fast)
uint32_t color_add(uint32_t c1, uint32_t c2)
{
if (c1 == BLACK) return c2;
if (c2 == BLACK) return c1;
uint32_t rb = (c1 & 0x00FF00FF) + (c2 & 0x00FF00FF);
uint32_t r = rb >> 16;
uint32_t b = rb & 0xFFFF;
uint32_t wg = ((c1>>8) & 0x00FF00FF) + ((c2>>8) & 0x00FF00FF);
uint32_t rb = (c1 & 0x00FF00FF) + (c2 & 0x00FF00FF); // mask and add two colors at once
uint32_t wg = ((c1>>8) & 0x00FF00FF) + ((c2>>8) & 0x00FF00FF);
uint32_t r = rb >> 16; // extract single color values
uint32_t b = rb & 0xFFFF;
uint32_t w = wg >> 16;
uint32_t g = wg & 0xFFFF;
uint32_t g = wg & 0xFFFF;
if (fast) {
r = r > 255 ? 255 : r;
g = g > 255 ? 255 : g;
b = b > 255 ? 255 : b;
w = w > 255 ? 255 : w;
return RGBW32(r,g,b,w);
} else {
unsigned max = r;
max = g > max ? g : max;
max = b > max ? b : max;
max = w > max ? w : max;
if (max < 256) return RGBW32(r, g, b, w);
else return RGBW32(r * 255 / max, g * 255 / max, b * 255 / max, w * 255 / max);
unsigned max = r; // check for overflow note: not checking and just topping out at 255 (formerly 'fast') is not any faster (but even slower if not overflowing)
max = g > max ? g : max;
max = b > max ? b : max;
max = w > max ? w : max;
if (max > 255) {
uint32_t scale = (uint32_t(255)<<8) / max; // division of two 8bit (shifted) values does not work -> use bit shifts and multiplaction instead
rb = ((rb * scale) >> 8) & 0x00FF00FF; //
wg = (wg * scale) & 0xFF00FF00;
}
else wg = wg << 8; //shift white and green back to correct position
return rb | wg;
}
/*
@ -70,52 +69,49 @@ uint32_t color_add(uint32_t c1, uint32_t c2, bool fast)
uint32_t color_fade(uint32_t c1, uint8_t amount, bool video)
{
if (c1 == BLACK || amount == 0) return BLACK;
if (amount == 255) return c1;
if (amount == 255) return c1;
uint32_t scaledcolor; // color order is: W R G B from MSB to LSB
uint32_t scale = amount; // 32bit for faster calculation
uint32_t addRemains = 0;
if (!video) amount++; // add one for correct scaling using bitshifts
if (!video) scale++; // add one for correct scaling using bitshifts
else { // video scaling: make sure colors do not dim to zero if they started non-zero
addRemains = R(c1) ? 0x00010000 : 0;
addRemains = R(c1) ? 0x00010000 : 0;
addRemains |= G(c1) ? 0x00000100 : 0;
addRemains |= B(c1) ? 0x00000001 : 0;
addRemains |= W(c1) ? 0x01000000 : 0;
}
uint32_t rb = (((c1 & 0x00FF00FF) * scale) >> 8) & 0x00FF00FF; // scale red and blue
uint32_t wg = (((c1 & 0xFF00FF00) >> 8) * scale) & 0xFF00FF00; // scale white and green
scaledcolor = (rb | wg) + addRemains;
scaledcolor = (rb | wg) + addRemains;
return scaledcolor;
}
// 1:1 replacement of fastled function optimized for ESP, slightly faster, more accurate and uses less flash (~ -200bytes)
CRGB ColorFromPaletteWLED(const CRGBPalette16& pal, unsigned index, uint8_t brightness, TBlendType blendType)
{
if ( blendType == LINEARBLEND_NOWRAP) {
//index = map8(index, 0, 239);
if (blendType == LINEARBLEND_NOWRAP) {
index = (index*240) >> 8; // Blend range is affected by lo4 blend of values, remap to avoid wrapping
}
unsigned hi4 = byte(index) >> 4;
// We then add that to a base array pointer.
const CRGB* entry = (CRGB*)( (uint8_t*)(&(pal[0])) + (hi4 * sizeof(CRGB)));
unsigned red1 = entry->r;
unsigned green1 = entry->g;
unsigned blue1 = entry->b;
unsigned blue1 = entry->b;
if(blendType != NOBLEND) {
if(hi4 == 15) entry = &(pal[0]);
else ++entry;
// unsigned red2 = entry->red;
unsigned f2 = ((index & 0x0F) << 4) + 1; // +1 so we scale by 256 as a max value, then result can just be shifted by 8
unsigned f1 = (257 - f2); // f2 is 1 minimum, so this is 256 max
red1 = (red1 * f1 + (unsigned)entry->r * f2) >> 8;
green1 = (green1 * f1 + (unsigned)entry->g * f2) >> 8;
blue1 = (blue1 * f1 + (unsigned)entry->b * f2) >> 8;
red1 = (red1 * f1 + (unsigned)entry->r * f2) >> 8;
green1 = (green1 * f1 + (unsigned)entry->g * f2) >> 8;
blue1 = (blue1 * f1 + (unsigned)entry->b * f2) >> 8;
}
if( brightness < 255) { // note: zero checking could be done to return black but that is hardly ever used so it is omitted
uint32_t scale = brightness + 1; // adjust for rounding (bitshift)
uint32_t scale = brightness + 1; // adjust for rounding (bitshift)
red1 = (red1 * scale) >> 8;
green1 = (green1 * scale) >> 8;
blue1 = (blue1 * scale) >> 8;
}
}
return CRGB((uint8_t)red1, (uint8_t)green1, (uint8_t)blue1);
}
@ -176,7 +172,7 @@ CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette)
harmonics[1] = basehue + 205 + random8(10);
harmonics[2] = basehue - 5 + random8(10);
break;
case 3: // square
harmonics[0] = basehue + 85 + random8(10);
harmonics[1] = basehue + 175 + random8(10);
@ -213,9 +209,9 @@ CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette)
//apply saturation & gamma correction
CRGB RGBpalettecolors[4];
for (int i = 0; i < 4; i++) {
if (makepastelpalette && palettecolors[i].saturation > 180) {
if (makepastelpalette && palettecolors[i].saturation > 180) {
palettecolors[i].saturation -= 160; //desaturate all four colors
}
}
RGBpalettecolors[i] = (CRGB)palettecolors[i]; //convert to RGB
RGBpalettecolors[i] = gamma32(((uint32_t)RGBpalettecolors[i]) & 0x00FFFFFFU); //strip alpha from CRGB
}

View File

@ -80,7 +80,7 @@ class NeoGammaWLEDMethod {
#define gamma32(c) NeoGammaWLEDMethod::Correct32(c)
#define gamma8(c) NeoGammaWLEDMethod::rawGamma8(c)
[[gnu::hot]] uint32_t color_blend(uint32_t,uint32_t,uint16_t,bool b16=false);
[[gnu::hot]] uint32_t color_add(uint32_t,uint32_t, bool fast=false);
[[gnu::hot]] uint32_t color_add(uint32_t,uint32_t);
[[gnu::hot]] uint32_t color_fade(uint32_t c1, uint8_t amount, bool video=false);
[[gnu::hot]] CRGB ColorFromPaletteWLED(const CRGBPalette16 &pal, unsigned index, uint8_t brightness = (uint8_t)255U, TBlendType blendType = LINEARBLEND);
CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette);