diff --git a/wled00/FX.h b/wled00/FX.h
index 3c28274d6..bea4dbcb8 100644
--- a/wled00/FX.h
+++ b/wled00/FX.h
@@ -595,9 +595,9 @@ typedef struct Segment {
     void fadeToBlackBy(uint8_t fadeBy);
     inline void blendPixelColor(int n, uint32_t color, uint8_t blend)    { setPixelColor(n, color_blend(getPixelColor(n), color, blend)); }
     inline void blendPixelColor(int n, CRGB c, uint8_t blend)            { blendPixelColor(n, RGBW32(c.r,c.g,c.b,0), blend); }
-    inline void addPixelColor(int n, uint32_t color, bool fast = false)  { setPixelColor(n, color_add(getPixelColor(n), color, fast)); }
-    inline void addPixelColor(int n, byte r, byte g, byte b, byte w = 0, bool fast = false) { addPixelColor(n, RGBW32(r,g,b,w), fast); }
-    inline void addPixelColor(int n, CRGB c, bool fast = false)          { addPixelColor(n, RGBW32(c.r,c.g,c.b,0), fast); }
+    inline void addPixelColor(int n, uint32_t color)                     { setPixelColor(n, color_add(getPixelColor(n), color)); }
+    inline void addPixelColor(int n, byte r, byte g, byte b, byte w = 0) { addPixelColor(n, RGBW32(r,g,b,w)); }
+    inline void addPixelColor(int n, CRGB c)                             { addPixelColor(n, RGBW32(c.r,c.g,c.b,0)); }
     inline void fadePixelColor(uint16_t n, uint8_t fade)                 { setPixelColor(n, color_fade(getPixelColor(n), fade, true)); }
     [[gnu::hot]] uint32_t color_from_palette(uint16_t, bool mapping, bool wrap, uint8_t mcol, uint8_t pbri = 255) const;
     [[gnu::hot]] uint32_t color_wheel(uint8_t pos) const;
@@ -605,11 +605,11 @@ typedef struct Segment {
     // 2D Blur: shortcuts for bluring columns or rows only (50% faster than full 2D blur)
     inline void blurCols(fract8 blur_amount, bool smear = false) { // blur all columns
       const unsigned cols = virtualWidth();
-      for (unsigned k = 0; k < cols; k++) blurCol(k, blur_amount, smear); 
+      for (unsigned k = 0; k < cols; k++) blurCol(k, blur_amount, smear);
     }
     inline void blurRows(fract8 blur_amount, bool smear = false) { // blur all rows
       const unsigned rows = virtualHeight();
-      for ( unsigned i = 0; i < rows; i++) blurRow(i, blur_amount, smear); 
+      for ( unsigned i = 0; i < rows; i++) blurRow(i, blur_amount, smear);
     }
 
     // 2D matrix
@@ -632,10 +632,10 @@ typedef struct Segment {
     // 2D support functions
     inline void blendPixelColorXY(uint16_t x, uint16_t y, uint32_t color, uint8_t blend) { setPixelColorXY(x, y, color_blend(getPixelColorXY(x,y), color, blend)); }
     inline void blendPixelColorXY(uint16_t x, uint16_t y, CRGB c, uint8_t blend)         { blendPixelColorXY(x, y, RGBW32(c.r,c.g,c.b,0), blend); }
-    inline void addPixelColorXY(int x, int y, uint32_t color, bool fast = false)         { setPixelColorXY(x, y, color_add(getPixelColorXY(x,y), color, fast)); }
-    inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0, bool fast = false) { addPixelColorXY(x, y, RGBW32(r,g,b,w), fast); }
-    inline void addPixelColorXY(int x, int y, CRGB c, bool fast = false)                             { addPixelColorXY(x, y, RGBW32(c.r,c.g,c.b,0), fast); }
-    inline void fadePixelColorXY(uint16_t x, uint16_t y, uint8_t fade)                               { setPixelColorXY(x, y, color_fade(getPixelColorXY(x,y), fade, true)); }
+    inline void addPixelColorXY(int x, int y, uint32_t color)                            { setPixelColorXY(x, y, color_add(getPixelColorXY(x,y), color)); }
+    inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0)        { addPixelColorXY(x, y, RGBW32(r,g,b,w)); }
+    inline void addPixelColorXY(int x, int y, CRGB c)                                    { addPixelColorXY(x, y, RGBW32(c.r,c.g,c.b,0)); }
+    inline void fadePixelColorXY(uint16_t x, uint16_t y, uint8_t fade)                   { setPixelColorXY(x, y, color_fade(getPixelColorXY(x,y), fade, true)); }
     void box_blur(unsigned r = 1U, bool smear = false); // 2D box blur
     void blur2D(uint8_t blur_amount, bool smear = false);
     void blurRow(uint32_t row, fract8 blur_amount, bool smear = false);
@@ -670,9 +670,9 @@ typedef struct Segment {
     inline uint32_t getPixelColorXY(int x, int y)                                 { return getPixelColor(x); }
     inline void blendPixelColorXY(uint16_t x, uint16_t y, uint32_t c, uint8_t blend) { blendPixelColor(x, c, blend); }
     inline void blendPixelColorXY(uint16_t x, uint16_t y, CRGB c, uint8_t blend)  { blendPixelColor(x, RGBW32(c.r,c.g,c.b,0), blend); }
-    inline void addPixelColorXY(int x, int y, uint32_t color, bool fast = false)  { addPixelColor(x, color, fast); }
-    inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0, bool fast = false) { addPixelColor(x, RGBW32(r,g,b,w), fast); }
-    inline void addPixelColorXY(int x, int y, CRGB c, bool fast = false)          { addPixelColor(x, RGBW32(c.r,c.g,c.b,0), fast); }
+    inline void addPixelColorXY(int x, int y, uint32_t color)                     { addPixelColor(x, color); }
+    inline void addPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0) { addPixelColor(x, RGBW32(r,g,b,w)); }
+    inline void addPixelColorXY(int x, int y, CRGB c)                             { addPixelColor(x, RGBW32(c.r,c.g,c.b,0)); }
     inline void fadePixelColorXY(uint16_t x, uint16_t y, uint8_t fade)            { fadePixelColor(x, fade); }
     inline void box_blur(unsigned i, bool vertical, fract8 blur_amount) {}
     inline void blur2D(uint8_t blur_amount, bool smear = false) {}
diff --git a/wled00/FX_2Dfcn.cpp b/wled00/FX_2Dfcn.cpp
index 57ee2e5e3..10b85a82e 100644
--- a/wled00/FX_2Dfcn.cpp
+++ b/wled00/FX_2Dfcn.cpp
@@ -173,7 +173,7 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col)
   if (!isActive()) return; // not active
   if (x >= virtualWidth() || y >= virtualHeight() || x<0 || y<0) return;  // if pixel would fall out of virtual segment just exit
 
-  uint8_t _bri_t = currentBri(); 
+  uint8_t _bri_t = currentBri();
   if (_bri_t < 255) {
     col = color_fade(col, _bri_t);
   }
@@ -185,11 +185,11 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col)
   y *= groupLength(); // expand to physical pixels
   int W = width();
   int H = height();
- 
+
   int yY = y;
   for (int j = 0; j < grouping; j++) {   // groupping vertically
     if(yY >= H) break;
-    int xX = x;    
+    int xX = x;
     for (int g = 0; g < grouping; g++) { // groupping horizontally
       if (xX >= W) continue;  // we have reached one dimension's end
 #ifndef WLED_DISABLE_MODE_BLEND
@@ -293,8 +293,8 @@ void Segment::blurRow(uint32_t row, fract8 blur_amount, bool smear){
     curnew = color_fade(cur, keep);
     if (x > 0) {
       if (carryover)
-        curnew = color_add(curnew, carryover, true);
-      uint32_t prev = color_add(lastnew, part, true);
+        curnew = color_add(curnew, carryover);
+      uint32_t prev = color_add(lastnew, part);
       if (last != prev) // optimization: only set pixel if color has changed
         setPixelColorXY(x - 1, row, prev);
     } else // first pixel
@@ -326,15 +326,15 @@ void Segment::blurCol(uint32_t col, fract8 blur_amount, bool smear) {
     curnew = color_fade(cur, keep);
     if (y > 0) {
       if (carryover)
-        curnew = color_add(curnew, carryover, true);
-      uint32_t prev = color_add(lastnew, part, true);      
+        curnew = color_add(curnew, carryover);
+      uint32_t prev = color_add(lastnew, part);
       if (last != prev) // optimization: only set pixel if color has changed
         setPixelColorXY(col, y - 1, prev);
     } else // first pixel
       setPixelColorXY(col, y, curnew);
     lastnew = curnew;
     last = cur; //save original value for comparison on next iteration
-    carryover = part;        
+    carryover = part;
   }
   setPixelColorXY(col, rows - 1, curnew);
 }
@@ -356,8 +356,8 @@ void Segment::blur2D(uint8_t blur_amount, bool smear) {
       uint32_t part = color_fade(cur, seep);
       curnew = color_fade(cur, keep);
       if (x > 0) {
-        if (carryover) curnew = color_add(curnew, carryover, true);
-        uint32_t prev = color_add(lastnew, part, true);
+        if (carryover) curnew = color_add(curnew, carryover);
+        uint32_t prev = color_add(lastnew, part);
         // optimization: only set pixel if color has changed
         if (last != prev) setPixelColorXY(x - 1, row, prev);
       } else setPixelColorXY(x, row, curnew); // first pixel
@@ -375,14 +375,14 @@ void Segment::blur2D(uint8_t blur_amount, bool smear) {
       uint32_t part = color_fade(cur, seep);
       curnew = color_fade(cur, keep);
       if (y > 0) {
-        if (carryover) curnew = color_add(curnew, carryover, true);
-        uint32_t prev = color_add(lastnew, part, true);      
+        if (carryover) curnew = color_add(curnew, carryover);
+        uint32_t prev = color_add(lastnew, part);
         // optimization: only set pixel if color has changed
         if (last != prev) setPixelColorXY(col, y - 1, prev);
       } else setPixelColorXY(col, y, curnew); // first pixel
       lastnew = curnew;
       last = cur; //save original value for comparison on next iteration
-      carryover = part;        
+      carryover = part;
     }
     setPixelColorXY(col, rows - 1, curnew);
   }
diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp
index 236f7ad4a..66aeaab63 100644
--- a/wled00/FX_fcn.cpp
+++ b/wled00/FX_fcn.cpp
@@ -712,12 +712,12 @@ void IRAM_ATTR_YN Segment::setPixelColor(int i, uint32_t col)
 {
   if (!isActive()) return; // not active
 #ifndef WLED_DISABLE_2D
-  int vStrip;  
+  int vStrip;
 #endif
   if (i >= virtualLength() || i<0) // pixel would fall out of segment, check if this is a virtual strip NOTE: this is almost always false if not virtual strip, saves the calculation on 'standard' call
   {
     #ifndef WLED_DISABLE_2D
-    vStrip = i>>16; // hack to allow running on virtual strips (2D segment columns/rows)    
+    vStrip = i>>16; // hack to allow running on virtual strips (2D segment columns/rows)
     #endif
     i &= 0xFFFF; //truncate vstrip index
     if (i >= virtualLength() || i<0) return;  // if pixel would still fall out of segment just exit
@@ -735,7 +735,7 @@ void IRAM_ATTR_YN Segment::setPixelColor(int i, uint32_t col)
       case M12_pBar:
         // expand 1D effect vertically or have it play on virtual strips
         if (vStrip>0) setPixelColorXY(vStrip - 1, vH - i - 1, col);
-        else          for (int x = 0; x < vW; x++) setPixelColorXY(x, vH - i - 1, col);
+        else for (int x = 0; x < vW; x++) setPixelColorXY(x, vH - i - 1, col);
         break;
       case M12_pArc:
         // expand in circular fashion from center
@@ -796,7 +796,7 @@ void IRAM_ATTR_YN Segment::setPixelColor(int i, uint32_t col)
         // Odd rays start further from center if prevRay started at center.
         static int prevRay = INT_MIN; // previous ray number
         if ((i % 2 == 1) && (i - 1 == prevRay || i + 1 == prevRay)) {
-          int jump = min(vW/3, vH/3); // can add 2 if using medium pinwheel 
+          int jump = min(vW/3, vH/3); // can add 2 if using medium pinwheel
           posx += inc_x * jump;
           posy += inc_y * jump;
         }
@@ -1145,8 +1145,8 @@ void Segment::blur(uint8_t blur_amount, bool smear) {
     uint32_t part = color_fade(cur, seep);
     curnew = color_fade(cur, keep);
     if (i > 0) {
-      if (carryover) curnew = color_add(curnew, carryover, true);
-      uint32_t prev = color_add(lastnew, part, true);
+      if (carryover) curnew = color_add(curnew, carryover);
+      uint32_t prev = color_add(lastnew, part);
       // optimization: only set pixel if color has changed
       if (last != prev) setPixelColor(i - 1, prev);
     } else // first pixel
@@ -1188,7 +1188,7 @@ uint32_t Segment::color_wheel(uint8_t pos) const {
  * @returns Single color from palette
  */
 uint32_t Segment::color_from_palette(uint16_t i, bool mapping, bool wrap, uint8_t mcol, uint8_t pbri) const {
-  
+
   uint32_t color = currentColor(mcol);
   // default palette or no RGB support on segment
   if ((palette == 0 && mcol < NUM_COLORS) || !_isRGB) {
@@ -1196,7 +1196,7 @@ uint32_t Segment::color_from_palette(uint16_t i, bool mapping, bool wrap, uint8_
     return (pbri == 255) ? color : color_fade(color, pbri, true);
   }
 
-  uint8_t paletteIndex = i;
+  unsigned paletteIndex = i;
   if (mapping && virtualLength() > 1) paletteIndex = (i*255)/(virtualLength() -1);
   // paletteBlend: 0 - wrap when moving, 1 - always wrap, 2 - never wrap, 3 - none (undefined)
   if (!wrap && strip.paletteBlend != 3) paletteIndex = scale8(paletteIndex, 240); //cut off blend at palette "end"
diff --git a/wled00/colors.cpp b/wled00/colors.cpp
index 104d25e60..54469ebe0 100644
--- a/wled00/colors.cpp
+++ b/wled00/colors.cpp
@@ -33,33 +33,32 @@ uint32_t color_blend(uint32_t color1, uint32_t color2, uint16_t blend, bool b16)
 
 /*
  * color add function that preserves ratio
- * idea: https://github.com/Aircoookie/WLED/pull/2465 by https://github.com/Proto-molecule
+ * original idea: https://github.com/Aircoookie/WLED/pull/2465 by https://github.com/Proto-molecule
+ * heavily optimized for speed by @dedehai
  */
-uint32_t color_add(uint32_t c1, uint32_t c2, bool fast)
+uint32_t color_add(uint32_t c1, uint32_t c2)
 {
   if (c1 == BLACK) return c2;
   if (c2 == BLACK) return c1;
-  uint32_t rb = (c1 & 0x00FF00FF) + (c2 & 0x00FF00FF); 
-  uint32_t r = rb >> 16;
-  uint32_t b = rb & 0xFFFF; 
-  uint32_t wg = ((c1>>8) & 0x00FF00FF) + ((c2>>8) & 0x00FF00FF); 
+  uint32_t rb = (c1 & 0x00FF00FF) + (c2 & 0x00FF00FF); // mask and add two colors at once
+  uint32_t wg = ((c1>>8) & 0x00FF00FF) + ((c2>>8) & 0x00FF00FF);
+  uint32_t r = rb >> 16; // extract single color values
+  uint32_t b = rb & 0xFFFF;
   uint32_t w = wg >> 16;
-  uint32_t g = wg & 0xFFFF; 
+  uint32_t g = wg & 0xFFFF;
 
-  if (fast) {    
-    r = r > 255 ? 255 : r; 
-    g = g > 255 ? 255 : g; 
-    b = b > 255 ? 255 : b; 
-    w = w > 255 ? 255 : w; 
-    return RGBW32(r,g,b,w);
-  } else {
-    unsigned max = r;
-    max = g > max ? g : max;
-    max = b > max ? b : max;
-    max = w > max ? w : max;
-    if (max < 256) return RGBW32(r, g, b, w);
-    else           return RGBW32(r * 255 / max, g * 255 / max, b * 255 / max, w * 255 / max);
+  unsigned max = r; // check for overflow note: not checking and just topping out at 255 (formerly 'fast') is not any faster (but even slower if not overflowing)
+  max = g > max ? g : max;
+  max = b > max ? b : max;
+  max = w > max ? w : max;
+
+  if (max > 255) {
+    uint32_t scale = (uint32_t(255)<<8) / max; // division of two 8bit (shifted) values does not work -> use bit shifts and multiplaction instead
+    rb = ((rb * scale) >> 8) & 0x00FF00FF; //
+    wg = (wg * scale) & 0xFF00FF00;
   }
+  else wg = wg << 8; //shift white and green back to correct position
+  return rb | wg;
 }
 
 /*
@@ -70,52 +69,49 @@ uint32_t color_add(uint32_t c1, uint32_t c2, bool fast)
 uint32_t color_fade(uint32_t c1, uint8_t amount, bool video)
 {
   if (c1 == BLACK || amount == 0) return BLACK;
-  if (amount == 255) return c1;  
+  if (amount == 255) return c1;
   uint32_t scaledcolor; // color order is: W R G B from MSB to LSB
   uint32_t scale = amount; // 32bit for faster calculation
   uint32_t addRemains = 0;
-  if (!video) amount++; // add one for correct scaling using bitshifts
+  if (!video) scale++; // add one for correct scaling using bitshifts
   else { // video scaling: make sure colors do not dim to zero if they started non-zero
-    addRemains = R(c1) ? 0x00010000 : 0;
+    addRemains  = R(c1) ? 0x00010000 : 0;
     addRemains |= G(c1) ? 0x00000100 : 0;
     addRemains |= B(c1) ? 0x00000001 : 0;
     addRemains |= W(c1) ? 0x01000000 : 0;
   }
   uint32_t rb = (((c1 & 0x00FF00FF) * scale) >> 8) & 0x00FF00FF; // scale red and blue
   uint32_t wg = (((c1 & 0xFF00FF00) >> 8) * scale) & 0xFF00FF00; // scale white and green
-  scaledcolor = (rb | wg) + addRemains;  
+  scaledcolor = (rb | wg) + addRemains;
   return scaledcolor;
 }
 
 // 1:1 replacement of fastled function optimized for ESP, slightly faster, more accurate and uses less flash (~ -200bytes)
 CRGB ColorFromPaletteWLED(const CRGBPalette16& pal, unsigned index, uint8_t brightness, TBlendType blendType)
 {
-   if ( blendType == LINEARBLEND_NOWRAP) {
-     //index = map8(index, 0, 239);  
+   if (blendType == LINEARBLEND_NOWRAP) {
      index = (index*240) >> 8; // Blend range is affected by lo4 blend of values, remap to avoid wrapping
    }
     unsigned hi4 = byte(index) >> 4;
-    // We then add that to a base array pointer.
     const CRGB* entry = (CRGB*)( (uint8_t*)(&(pal[0])) + (hi4 * sizeof(CRGB)));
     unsigned red1   = entry->r;
     unsigned green1 = entry->g;
-    unsigned blue1  = entry->b;     
+    unsigned blue1  = entry->b;
     if(blendType != NOBLEND) {
         if(hi4 == 15) entry = &(pal[0]);
         else ++entry;
-       // unsigned red2 = entry->red;      
         unsigned f2 = ((index & 0x0F) << 4) + 1; // +1 so we scale by 256 as a max value, then result can just be shifted by 8
         unsigned f1 = (257 - f2); // f2 is 1 minimum, so this is 256 max
-        red1   = (red1 * f1 + (unsigned)entry->r * f2) >> 8;          
-        green1   = (green1 * f1 + (unsigned)entry->g * f2) >> 8;        
-        blue1   = (blue1 * f1 + (unsigned)entry->b * f2) >> 8;                
+        red1   = (red1 * f1 + (unsigned)entry->r * f2) >> 8;
+        green1 = (green1 * f1 + (unsigned)entry->g * f2) >> 8;
+        blue1  = (blue1 * f1 + (unsigned)entry->b * f2) >> 8;
     }
     if( brightness < 255) { // note: zero checking could be done to return black but that is hardly ever used so it is omitted
-          uint32_t scale = brightness + 1; // adjust for rounding (bitshift)          
+          uint32_t scale = brightness + 1; // adjust for rounding (bitshift)
           red1   = (red1 * scale) >> 8;
           green1 = (green1 * scale) >> 8;
           blue1  = (blue1 * scale) >> 8;
-    } 
+    }
     return CRGB((uint8_t)red1, (uint8_t)green1, (uint8_t)blue1);
 }
 
@@ -176,7 +172,7 @@ CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette)
       harmonics[1] = basehue + 205 + random8(10);
       harmonics[2] = basehue -   5 + random8(10);
       break;
-    
+
     case 3: // square
       harmonics[0] = basehue +  85 + random8(10);
       harmonics[1] = basehue + 175 + random8(10);
@@ -213,9 +209,9 @@ CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette)
   //apply saturation & gamma correction
   CRGB RGBpalettecolors[4];
   for (int i = 0; i < 4; i++) {
-    if (makepastelpalette && palettecolors[i].saturation > 180) { 
+    if (makepastelpalette && palettecolors[i].saturation > 180) {
       palettecolors[i].saturation -= 160; //desaturate all four colors
-    }    
+    }
     RGBpalettecolors[i] = (CRGB)palettecolors[i]; //convert to RGB
     RGBpalettecolors[i] = gamma32(((uint32_t)RGBpalettecolors[i]) & 0x00FFFFFFU); //strip alpha from CRGB
   }
diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h
index ac941dc97..be7ed4462 100644
--- a/wled00/fcn_declare.h
+++ b/wled00/fcn_declare.h
@@ -80,7 +80,7 @@ class NeoGammaWLEDMethod {
 #define gamma32(c) NeoGammaWLEDMethod::Correct32(c)
 #define gamma8(c)  NeoGammaWLEDMethod::rawGamma8(c)
 [[gnu::hot]] uint32_t color_blend(uint32_t,uint32_t,uint16_t,bool b16=false);
-[[gnu::hot]] uint32_t color_add(uint32_t,uint32_t, bool fast=false);
+[[gnu::hot]] uint32_t color_add(uint32_t,uint32_t);
 [[gnu::hot]] uint32_t color_fade(uint32_t c1, uint8_t amount, bool video=false);
 [[gnu::hot]] CRGB ColorFromPaletteWLED(const CRGBPalette16 &pal, unsigned index, uint8_t brightness = (uint8_t)255U, TBlendType blendType = LINEARBLEND);
 CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette);