mirror of
https://github.com/wled/WLED.git
synced 2025-07-21 09:46:32 +00:00
replacement for fastled sqrt16() (#4426)
* added bitwise operation based sqrt16 - replacement for fastled, it is about 10% slower for numbers smaller 128 but faster for larger numbers. speed difference is irrelevant to WLED but it saves some flash. * updated to 32bit, improved for typical WLED use - making it 32bits allows for larger numbers - added another initial condition check for medium sized numbers - increased the "small number" optimization to larger numbers: the function is currently only used to calculate sqrt(x^2+y^2) which even for small segments is larger than the initially used 64, so optimizing for 1024 makes more sense, although the value is arbitrarily chosen
This commit is contained in:
parent
aab29cb0ab
commit
a421a90e0a
@ -5446,15 +5446,15 @@ uint16_t mode_2Dmetaballs(void) { // Metaballs by Stefan Petrick. Cannot have
|
|||||||
// and add them together with weightening
|
// and add them together with weightening
|
||||||
unsigned dx = abs(x - x1);
|
unsigned dx = abs(x - x1);
|
||||||
unsigned dy = abs(y - y1);
|
unsigned dy = abs(y - y1);
|
||||||
unsigned dist = 2 * sqrt16((dx * dx) + (dy * dy));
|
unsigned dist = 2 * sqrt32_bw((dx * dx) + (dy * dy));
|
||||||
|
|
||||||
dx = abs(x - x2);
|
dx = abs(x - x2);
|
||||||
dy = abs(y - y2);
|
dy = abs(y - y2);
|
||||||
dist += sqrt16((dx * dx) + (dy * dy));
|
dist += sqrt32_bw((dx * dx) + (dy * dy));
|
||||||
|
|
||||||
dx = abs(x - x3);
|
dx = abs(x - x3);
|
||||||
dy = abs(y - y3);
|
dy = abs(y - y3);
|
||||||
dist += sqrt16((dx * dx) + (dy * dy));
|
dist += sqrt32_bw((dx * dx) + (dy * dy));
|
||||||
|
|
||||||
// inverse result
|
// inverse result
|
||||||
int color = dist ? 1000 / dist : 255;
|
int color = dist ? 1000 / dist : 255;
|
||||||
|
@ -679,7 +679,7 @@ uint16_t Segment::virtualLength() const {
|
|||||||
vLen = max(vW,vH); // get the longest dimension
|
vLen = max(vW,vH); // get the longest dimension
|
||||||
break;
|
break;
|
||||||
case M12_pArc:
|
case M12_pArc:
|
||||||
vLen = sqrt16(vH*vH + vW*vW); // use diagonal
|
vLen = sqrt32_bw(vH*vH + vW*vW); // use diagonal
|
||||||
break;
|
break;
|
||||||
case M12_sPinwheel:
|
case M12_sPinwheel:
|
||||||
vLen = getPinwheelLength(vW, vH);
|
vLen = getPinwheelLength(vW, vH);
|
||||||
@ -922,7 +922,7 @@ uint32_t IRAM_ATTR_YN Segment::getPixelColor(int i) const
|
|||||||
break; }
|
break; }
|
||||||
case M12_pArc:
|
case M12_pArc:
|
||||||
if (i >= vW && i >= vH) {
|
if (i >= vW && i >= vH) {
|
||||||
unsigned vI = sqrt16(i*i/2);
|
unsigned vI = sqrt32_bw(i*i/2);
|
||||||
return getPixelColorXY(vI,vI); // use diagonal
|
return getPixelColorXY(vI,vI); // use diagonal
|
||||||
}
|
}
|
||||||
case M12_pCorner:
|
case M12_pCorner:
|
||||||
|
@ -552,6 +552,7 @@ float asin_t(float x);
|
|||||||
template <typename T> T atan_t(T x);
|
template <typename T> T atan_t(T x);
|
||||||
float floor_t(float x);
|
float floor_t(float x);
|
||||||
float fmod_t(float num, float denom);
|
float fmod_t(float num, float denom);
|
||||||
|
uint32_t sqrt32_bw(uint32_t x);
|
||||||
#define sin_t sin_approx
|
#define sin_t sin_approx
|
||||||
#define cos_t cos_approx
|
#define cos_t cos_approx
|
||||||
#define tan_t tan_approx
|
#define tan_t tan_approx
|
||||||
|
@ -220,3 +220,27 @@ float fmod_t(float num, float denom) {
|
|||||||
#endif
|
#endif
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bit-wise integer square root calculation (exact)
|
||||||
|
uint32_t sqrt32_bw(uint32_t x) {
|
||||||
|
uint32_t res = 0;
|
||||||
|
uint32_t bit;
|
||||||
|
uint32_t num = x; // use 32bit for faster calculation
|
||||||
|
|
||||||
|
if(num < 1 << 10) bit = 1 << 10; // speed optimization for small numbers < 32^2
|
||||||
|
else if (num < 1 << 20) bit = 1 << 20; // speed optimization for medium numbers < 1024^2
|
||||||
|
else bit = 1 << 30; // start with highest power of 4 <= 2^32
|
||||||
|
|
||||||
|
while (bit > num) bit >>= 2; // reduce iterations
|
||||||
|
|
||||||
|
while (bit != 0) {
|
||||||
|
if (num >= res + bit) {
|
||||||
|
num -= res + bit;
|
||||||
|
res = (res >> 1) + bit;
|
||||||
|
} else {
|
||||||
|
res >>= 1;
|
||||||
|
}
|
||||||
|
bit >>= 2;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user