some improvements to consider

no real difference in FPS but code is faster. also 160bytes smaller, meaning it is actually faster
2025-07-19 00:36:36 +00:00 · 2024-09-11 21:41:42 +02:00 · 2024-09-11 21:41:42 +02:00 · c3f472fbcb
commit c3f472fbcb
parent eae5a74a11
3 changed files with 67 additions and 22 deletions
--- a/wled00/FX_2Dfcn.cpp
+++ b/wled00/FX_2Dfcn.cpp
@ -173,11 +173,6 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col)
  if (!isActive()) return; // not active
  if (x >= virtualWidth() || y >= virtualHeight() || x<0 || y<0) return;  // if pixel would fall out of virtual segment just exit

-  uint8_t _bri_t = currentBri();
-  if (_bri_t < 255) {
-    col = color_fade(col, _bri_t);
-  }
-
  if (reverse  ) x = virtualWidth()  - x - 1;
  if (reverse_y) y = virtualHeight() - y - 1;
  if (transpose) { std::swap(x,y); } // swap X & Y if segment transposed
@ -189,7 +184,11 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col)
  int H = height();
  if (x >= W || y >= H) return;  // if pixel would fall out of segment just exit

-  uint32_t tmpCol = col;
+  uint8_t _bri_t = currentBri(); 
+  if (_bri_t < 255) {
+    col = color_fade(col, _bri_t);
+  }
+
  for (int j = 0; j < grouping; j++) {   // groupping vertically
    for (int g = 0; g < grouping; g++) { // groupping horizontally
      int xX = (x+g), yY = (y+j);
@ -197,21 +196,21 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col)

 #ifndef WLED_DISABLE_MODE_BLEND
      // if blending modes, blend with underlying pixel
-      if (_modeBlend) tmpCol = color_blend(strip.getPixelColorXY(start + xX, startY + yY), col, 0xFFFFU - progress(), true);
+      if (_modeBlend) col = color_blend(strip.getPixelColorXY(start + xX, startY + yY), col, 0xFFFFU - progress(), true);
 #endif

-      strip.setPixelColorXY(start + xX, startY + yY, tmpCol);
+      strip.setPixelColorXY(start + xX, startY + yY, col);

      if (mirror) { //set the corresponding horizontally mirrored pixel
-        if (transpose) strip.setPixelColorXY(start + xX, startY + height() - yY - 1, tmpCol);
-        else           strip.setPixelColorXY(start + width() - xX - 1, startY + yY, tmpCol);
+        if (transpose) strip.setPixelColorXY(start + xX, startY + height() - yY - 1, col);
+        else           strip.setPixelColorXY(start + width() - xX - 1, startY + yY, col);
      }
      if (mirror_y) { //set the corresponding vertically mirrored pixel
-        if (transpose) strip.setPixelColorXY(start + width() - xX - 1, startY + yY, tmpCol);
-        else           strip.setPixelColorXY(start + xX, startY + height() - yY - 1, tmpCol);
+        if (transpose) strip.setPixelColorXY(start + width() - xX - 1, startY + yY, col);
+        else           strip.setPixelColorXY(start + xX, startY + height() - yY - 1, col);
      }
      if (mirror_y && mirror) { //set the corresponding vertically AND horizontally mirrored pixel
-        strip.setPixelColorXY(start + width() - xX - 1, startY + height() - yY - 1, tmpCol);
+        strip.setPixelColorXY(start + width() - xX - 1, startY + height() - yY - 1, col);
      }
    }
  }
--- a/wled00/colors.cpp
+++ b/wled00/colors.cpp
@ -72,25 +72,69 @@ uint32_t color_fade(uint32_t c1, uint8_t amount, bool video)
 {
  if (c1 == BLACK || amount + video == 0) return BLACK;
  uint32_t scaledcolor; // color order is: W R G B from MSB to LSB
-  uint32_t r = R(c1);
-  uint32_t g = G(c1);
-  uint32_t b = B(c1);
-  uint32_t w = W(c1);
  uint32_t scale = amount; // 32bit for faster calculation
  if (video) {
+    uint32_t r = R(c1);
+    uint32_t g = G(c1);
+    uint32_t b = B(c1);
+    uint32_t w = W(c1);
    scaledcolor  = (((r * scale) >> 8) + ((r && scale) ? 1 : 0)) << 16;
    scaledcolor |= (((g * scale) >> 8) + ((g && scale) ? 1 : 0)) << 8;
    scaledcolor |=  ((b * scale) >> 8) + ((b && scale) ? 1 : 0);
    scaledcolor |= (((w * scale) >> 8) + ((w && scale) ? 1 : 0)) << 24;
-  } else {
-    scaledcolor  = ((r * scale) >> 8) << 16;
-    scaledcolor |= ((g * scale) >> 8) << 8;
-    scaledcolor |=  (b * scale) >> 8;
-    scaledcolor |= ((w * scale) >> 8) << 24;
+  } else {  // according to compile explorer, this is 15% faster but cannot be used for video (its not faster if the assignments are seperated)
+   uint32_t r = (((c1&0x00FF0000) * scale) >> 8) & 0x00FF0000;
+   uint32_t g = (((c1&0x0000FF00) * scale) >> 8) & 0x0000FF00;
+   uint32_t b = ((c1&0x000000FF) * scale) >> 8;
+   uint32_t w = (((c1 & 0xFF000000) >> 8) * scale) & 0xFF000000; // Scale w and keep it in position
+   scaledcolor = r | g | b | w;
  }
  return scaledcolor;
 }

+// 1:1 replacement of fastled function optimized for ESP, slightly faster, more accurate and uses less flash (~ -200bytes)
+CRGB ColorFromPaletteWLED(const CRGBPalette16& pal, unsigned index, uint8_t brightness, TBlendType blendType)
+{
+   if ( blendType == LINEARBLEND_NOWRAP) {
+     //index = map8(index, 0, 239);  
+     index = (index*240) >> 8; // Blend range is affected by lo4 blend of values, remap to avoid wrapping
+   }
+    unsigned hi4 = byte(index) >> 4;
+    unsigned lo4 = index & 0x0F;
+    unsigned hi4XsizeofCRGB = hi4 * sizeof(CRGB);
+    // We then add that to a base array pointer.
+    const CRGB* entry = (CRGB*)( (uint8_t*)(&(pal[0])) + hi4XsizeofCRGB);
+    unsigned red1   = entry->red;
+    unsigned green1 = entry->green;
+    unsigned blue1  = entry->blue;     
+    if(blendType != NOBLEND) {
+        if(hi4 == 15) entry = &(pal[0]);
+        else ++entry;
+        unsigned red2 = entry->red;
+        unsigned green2 = entry->green;
+        unsigned blue2  = entry->blue;         
+        unsigned f2 = (lo4 << 4)+1; // +1 so we scale by 256 as a max value, then result can just be shifted by 8
+        unsigned f1 = (257 - f2); // f2 is 1 minimum, so this is 256 max
+        red1   *= f1;
+        green1 *= f1;
+        blue1  *= f1;
+        red2   *= f2;
+        green2 *= f2;
+        blue2  *= f2;
+        red1   = (red1 + red2) >> 8;          
+        green1 = (green1 + green2) >> 8;        
+        blue1  = (blue1 + blue2) >> 8;
+    }
+    if( brightness != 255) { // note: zero checking could be done to return black but that is hardly ever used so it is omitted
+          uint32_t scale = brightness;
+          scale++; // adjust for rounding (bitshift)
+          red1   = (red1 * scale) >> 8;
+          green1 = (green1 * scale) >> 8;
+          blue1  = (blue1 * scale) >> 8;
+    } 
+    return CRGB((uint8_t)red1, (uint8_t)green1, (uint8_t)blue1);
+}
+
 void setRandomColor(byte* rgb)
 {
  lastRandomIndex = get_random_wheel_index(lastRandomIndex);
--- a/wled00/fcn_declare.h
+++ b/wled00/fcn_declare.h
@ -66,6 +66,7 @@ typedef struct WiFiConfig {
 } wifi_config;

 //colors.cpp
+#define ColorFromPalette ColorFromPaletteWLED // override fastled version
 // similar to NeoPixelBus NeoGammaTableMethod but allows dynamic changes (superseded by NPB::NeoGammaDynamicTableMethod)
 class NeoGammaWLEDMethod {
  public:
@ -81,6 +82,7 @@ class NeoGammaWLEDMethod {
 [[gnu::hot]] uint32_t color_blend(uint32_t,uint32_t,uint16_t,bool b16=false);
 [[gnu::hot]] uint32_t color_add(uint32_t,uint32_t, bool fast=false);
 [[gnu::hot]] uint32_t color_fade(uint32_t c1, uint8_t amount, bool video=false);
+CRGB ColorFromPaletteWLED(const CRGBPalette16 &pal, unsigned index, uint8_t brightness = (uint8_t)255U, TBlendType blendType = LINEARBLEND);
 CRGBPalette16 generateHarmonicRandomPalette(CRGBPalette16 &basepalette);
 CRGBPalette16 generateRandomPalette();
 inline uint32_t colorFromRgbw(byte* rgbw) { return uint32_t((byte(rgbw[3]) << 24) | (byte(rgbw[0]) << 16) | (byte(rgbw[1]) << 8) | (byte(rgbw[2]))); }