fix shine macros (#17725)

This commit is contained in:
gemu 2023-01-17 09:06:20 +01:00 committed by GitHub
parent d904e0aa7f
commit beb021210d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 55 deletions

View File

@ -138,7 +138,7 @@ void shine_iteration_loop(shine_global_config *config) {
*/ */
for (i=GRANULE_SIZE, config->l3loop->xrmax=0; i--;) for (i=GRANULE_SIZE, config->l3loop->xrmax=0; i--;)
{ {
config->l3loop->xrsq[i] = mulsr(config->l3loop->xr[i],config->l3loop->xr[i]); config->l3loop->xrsq[i] = asm_mulsr(config->l3loop->xr[i],config->l3loop->xr[i]);
config->l3loop->xrabs[i] = abs(config->l3loop->xr[i]); config->l3loop->xrabs[i] = abs(config->l3loop->xr[i]);
if(config->l3loop->xrabs[i]>config->l3loop->xrmax) if(config->l3loop->xrabs[i]>config->l3loop->xrmax)
config->l3loop->xrmax=config->l3loop->xrabs[i]; config->l3loop->xrmax=config->l3loop->xrabs[i];
@ -408,7 +408,7 @@ int quantize(int ix[GRANULE_SIZE], int stepsize, shine_global_config *config )
/* a quick check to see if ixmax will be less than 8192 */ /* a quick check to see if ixmax will be less than 8192 */
/* this speeds up the early calls to bin_search_StepSize */ /* this speeds up the early calls to bin_search_StepSize */
if((mulr(config->l3loop->xrmax,scalei)) > 165140) /* 8192**(4/3) */ if((asm_mulr(config->l3loop->xrmax,scalei)) > 165140) /* 8192**(4/3) */
max = 16384; /* no point in continuing, stepsize not big enough */ max = 16384; /* no point in continuing, stepsize not big enough */
else else
for(i=0, max=0;i<GRANULE_SIZE;i++) for(i=0, max=0;i<GRANULE_SIZE;i++)
@ -416,7 +416,7 @@ int quantize(int ix[GRANULE_SIZE], int stepsize, shine_global_config *config )
/* This calculation is very sensitive. The multiply must round it's /* This calculation is very sensitive. The multiply must round it's
* result or bad things happen to the quality. * result or bad things happen to the quality.
*/ */
ln = mulr(abs(config->l3loop->xr[i]),scalei); ln = asm_mulr(abs(config->l3loop->xr[i]),scalei);
if(ln<10000) /* ln < 10000 catches most values */ if(ln<10000) /* ln < 10000 catches most values */
ix[i] = config->l3loop->int2idx[ln]; /* quick look up method */ ix[i] = config->l3loop->int2idx[ln]; /* quick look up method */

View File

@ -98,32 +98,32 @@ void shine_mdct_sub(shine_global_config *config, int stride) {
uint32_t vm_lo __attribute__((unused)); uint32_t vm_lo __attribute__((unused));
#endif #endif
mul0(vm, vm_lo, mdct_in[35], config->mdct.cos_l[k][35]); asm_mul0(vm, vm_lo, mdct_in[35], config->mdct.cos_l[k][35]);
for(j=35; j; j-=7) { for(j=35; j; j-=7) {
muladd(vm, vm_lo, mdct_in[j-1], config->mdct.cos_l[k][j-1]); asm_muladd(vm, vm_lo, mdct_in[j-1], config->mdct.cos_l[k][j-1]);
muladd(vm, vm_lo, mdct_in[j-2], config->mdct.cos_l[k][j-2]); asm_muladd(vm, vm_lo, mdct_in[j-2], config->mdct.cos_l[k][j-2]);
muladd(vm, vm_lo, mdct_in[j-3], config->mdct.cos_l[k][j-3]); asm_muladd(vm, vm_lo, mdct_in[j-3], config->mdct.cos_l[k][j-3]);
muladd(vm, vm_lo, mdct_in[j-4], config->mdct.cos_l[k][j-4]); asm_muladd(vm, vm_lo, mdct_in[j-4], config->mdct.cos_l[k][j-4]);
muladd(vm, vm_lo, mdct_in[j-5], config->mdct.cos_l[k][j-5]); asm_muladd(vm, vm_lo, mdct_in[j-5], config->mdct.cos_l[k][j-5]);
muladd(vm, vm_lo, mdct_in[j-6], config->mdct.cos_l[k][j-6]); asm_muladd(vm, vm_lo, mdct_in[j-6], config->mdct.cos_l[k][j-6]);
muladd(vm, vm_lo, mdct_in[j-7], config->mdct.cos_l[k][j-7]); asm_muladd(vm, vm_lo, mdct_in[j-7], config->mdct.cos_l[k][j-7]);
} }
mulz(vm, vm_lo); asm_mulz(vm, vm_lo);
mdct_enc[band][k] = vm; mdct_enc[band][k] = vm;
} }
/* Perform aliasing reduction butterfly */ /* Perform aliasing reduction butterfly */
asm ("#cmuls:"); asm ("#asm_cmuls:");
if (band != 0) if (band != 0)
{ {
cmuls(mdct_enc[band][0], mdct_enc[band-1][17-0], mdct_enc[band][0], mdct_enc[band-1][17-0], MDCT_CS0, MDCT_CA0); asm_cmuls(mdct_enc[band][0], mdct_enc[band-1][17-0], mdct_enc[band][0], mdct_enc[band-1][17-0], MDCT_CS0, MDCT_CA0);
cmuls(mdct_enc[band][1], mdct_enc[band-1][17-1], mdct_enc[band][1], mdct_enc[band-1][17-1], MDCT_CS1, MDCT_CA1); asm_cmuls(mdct_enc[band][1], mdct_enc[band-1][17-1], mdct_enc[band][1], mdct_enc[band-1][17-1], MDCT_CS1, MDCT_CA1);
cmuls(mdct_enc[band][2], mdct_enc[band-1][17-2], mdct_enc[band][2], mdct_enc[band-1][17-2], MDCT_CS2, MDCT_CA2); asm_cmuls(mdct_enc[band][2], mdct_enc[band-1][17-2], mdct_enc[band][2], mdct_enc[band-1][17-2], MDCT_CS2, MDCT_CA2);
cmuls(mdct_enc[band][3], mdct_enc[band-1][17-3], mdct_enc[band][3], mdct_enc[band-1][17-3], MDCT_CS3, MDCT_CA3); asm_cmuls(mdct_enc[band][3], mdct_enc[band-1][17-3], mdct_enc[band][3], mdct_enc[band-1][17-3], MDCT_CS3, MDCT_CA3);
cmuls(mdct_enc[band][4], mdct_enc[band-1][17-4], mdct_enc[band][4], mdct_enc[band-1][17-4], MDCT_CS4, MDCT_CA4); asm_cmuls(mdct_enc[band][4], mdct_enc[band-1][17-4], mdct_enc[band][4], mdct_enc[band-1][17-4], MDCT_CS4, MDCT_CA4);
cmuls(mdct_enc[band][5], mdct_enc[band-1][17-5], mdct_enc[band][5], mdct_enc[band-1][17-5], MDCT_CS5, MDCT_CA5); asm_cmuls(mdct_enc[band][5], mdct_enc[band-1][17-5], mdct_enc[band][5], mdct_enc[band-1][17-5], MDCT_CS5, MDCT_CA5);
cmuls(mdct_enc[band][6], mdct_enc[band-1][17-6], mdct_enc[band][6], mdct_enc[band-1][17-6], MDCT_CS6, MDCT_CA6); asm_cmuls(mdct_enc[band][6], mdct_enc[band-1][17-6], mdct_enc[band][6], mdct_enc[band-1][17-6], MDCT_CS6, MDCT_CA6);
cmuls(mdct_enc[band][7], mdct_enc[band-1][17-7], mdct_enc[band][7], mdct_enc[band-1][17-7], MDCT_CS7, MDCT_CA7); asm_cmuls(mdct_enc[band][7], mdct_enc[band-1][17-7], mdct_enc[band][7], mdct_enc[band-1][17-7], MDCT_CS7, MDCT_CA7);
} }
} }
} }

View File

@ -66,15 +66,15 @@ void shine_window_filter_subband(int16_t **buffer, int32_t s[SBLIMIT], int ch, s
uint32_t s_value_lo __attribute__((unused)); uint32_t s_value_lo __attribute__((unused));
#endif #endif
mul0 (s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (0<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (0<<6)]); asm_mul0 (s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (0<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (0<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (1<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (1<<6)]); asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (1<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (1<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (2<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (2<<6)]); asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (2<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (2<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (3<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (3<<6)]); asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (3<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (3<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (4<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (4<<6)]); asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (4<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (4<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (5<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (5<<6)]); asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (5<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (5<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (6<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (6<<6)]); asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (6<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (6<<6)]);
muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (7<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (7<<6)]); asm_muladd(s_value, s_value_lo, config->subband.x[ch][(config->subband.off[ch] + i + (7<<6)) & (HAN_SIZE-1)], shine_enwindow[i + (7<<6)]);
mulz (s_value, s_value_lo); asm_mulz (s_value, s_value_lo);
y[i] = s_value; y[i] = s_value;
} }
@ -88,17 +88,17 @@ void shine_window_filter_subband(int16_t **buffer, int32_t s[SBLIMIT], int ch, s
uint32_t s_value_lo __attribute__((unused)); uint32_t s_value_lo __attribute__((unused));
#endif #endif
mul0(s_value, s_value_lo, config->subband.fl[i][63], y[63]); asm_mul0(s_value, s_value_lo, config->subband.fl[i][63], y[63]);
for (j=63; j; j-=7) { for (j=63; j; j-=7) {
muladd(s_value, s_value_lo, config->subband.fl[i][j-1], y[j-1]); asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-1], y[j-1]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-2], y[j-2]); asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-2], y[j-2]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-3], y[j-3]); asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-3], y[j-3]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-4], y[j-4]); asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-4], y[j-4]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-5], y[j-5]); asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-5], y[j-5]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-6], y[j-6]); asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-6], y[j-6]);
muladd(s_value, s_value_lo, config->subband.fl[i][j-7], y[j-7]); asm_muladd(s_value, s_value_lo, config->subband.fl[i][j-7], y[j-7]);
} }
mulz(s_value, s_value_lo); asm_mulz(s_value, s_value_lo);
s[i] = s_value; s[i] = s_value;
} }
} }

View File

@ -1,9 +1,9 @@
#include <stdint.h> #include <stdint.h>
#ifndef mul #ifndef asm_mul
//#define /// mul(a,b) (int32_t) ( ( ((int64_t) a) * ((int64_t) b) ) >>32 ) //#define /// mul(a,b) (int32_t) ( ( ((int64_t) a) * ((int64_t) b) ) >>32 )
#define mul(x,y) \ #define asm_mul(x,y) \
({ \ ({ \
register int32_t result; \ register int32_t result; \
asm ("mulsh %0, %2, %1" : "=r" (result) : "r" (x), "r" (y)); \ asm ("mulsh %0, %2, %1" : "=r" (result) : "r" (x), "r" (y)); \
@ -12,8 +12,8 @@
#endif #endif
#ifndef muls //Not sure about this #ifndef asm_muls //Not sure about this
#define muls(x,y) \ #define asm_muls(x,y) \
({ \ ({ \
register int32_t result; \ register int32_t result; \
asm ( \ asm ( \
@ -28,8 +28,8 @@
//#define muls(a,b) (int32_t) ( ( ((int64_t) a) * ((int64_t) b) ) >>31 ) //#define muls(a,b) (int32_t) ( ( ((int64_t) a) * ((int64_t) b) ) >>31 )
#endif #endif
#ifndef mulr //no rounding shortcut #ifndef asm_mulr //no rounding shortcut
#define mulr(x,y) \ #define asm_mulr(x,y) \
({ \ ({ \
register int32_t result; \ register int32_t result; \
asm ("mulsh %0, %2, %1" : "=r" (result) : "r" (x), "r" (y)); \ asm ("mulsh %0, %2, %1" : "=r" (result) : "r" (x), "r" (y)); \
@ -39,8 +39,8 @@
//#define mulr(a,b) (int32_t) ( ( ( ((int64_t) a) * ((int64_t) b)) + 0x80000000LL ) >>32 ) //#define mulr(a,b) (int32_t) ( ( ( ((int64_t) a) * ((int64_t) b)) + 0x80000000LL ) >>32 )
#endif #endif
#ifndef mulsr //no rounding shortcut #ifndef asm_mulsr //no rounding shortcut
#define mulsr(x,y) \ #define asm_mulsr(x,y) \
({ \ ({ \
register int32_t result; \ register int32_t result; \
asm ( \ asm ( \
@ -53,11 +53,11 @@
//#define mulsr(a,b) (int32_t) ( ( ( ((int64_t) a) * ((int64_t) b)) + 0x40000000LL ) >>31 ) //#define mulsr(a,b) (int32_t) ( ( ( ((int64_t) a) * ((int64_t) b)) + 0x40000000LL ) >>31 )
#endif #endif
#ifndef mul0 #ifndef asm_mul0
#define mul0(hi,lo,a,b) ((hi) = mul((a), (b))) #define asm_mul0(hi,lo,a,b) ((hi) = asm_mul((a), (b)))
// This didn't seem to help either // This didn't seem to help either
#define muladd(hi, lo, x, y) \ #define asm_muladd(hi, lo, x, y) \
({ \ ({ \
asm ( \ asm ( \
"mulsh a7, %2, %1\n\t" \ "mulsh a7, %2, %1\n\t" \
@ -67,8 +67,8 @@
}) })
//#define muladd(hi,lo,a,b) ((hi) += mul((a), (b))) //#define asm_muladd(hi,lo,a,b) ((hi) += mul((a), (b)))
#define mulsub(hi, lo, x, y) \ #define asm_mulsub(hi, lo, x, y) \
({ \ ({ \
asm ( \ asm ( \
"mulsh a8, %2, %1\n\t" \ "mulsh a8, %2, %1\n\t" \
@ -77,10 +77,10 @@
: "a8");\ : "a8");\
}) })
//#define mulsub(hi,lo,a,b) ((hi) -= mul((a), (b))) //#define mulsub(hi,lo,a,b) ((hi) -= mul((a), (b)))
#define mulz(hi,lo) #define asm_mulz(hi,lo)
#endif #endif
#ifndef cmuls #ifndef asm_cmuls
/* /*
#define cmuls(dre, dim, are, aim, bre, bim) \ #define cmuls(dre, dim, are, aim, bre, bim) \
do { \ do { \
@ -109,7 +109,7 @@ do { \
} while (0)*/ } while (0)*/
#define cmuls(dre, dim, are, aim, bre, bim) \ #define asm_cmuls(dre, dim, are, aim, bre, bim) \
do { \ do { \
int32_t tre; \ int32_t tre; \
(tre) = (int32_t) (((int64_t) (are) * (int64_t) (bre) - (int64_t) (aim) * (int64_t) (bim)) >> 31); \ (tre) = (int32_t) (((int64_t) (are) * (int64_t) (bre) - (int64_t) (aim) * (int64_t) (bim)) >> 31); \