ref: 1dab60cc91339f977b39db4849417483d5b3c15b
parent: b781877e80052b317e8856044d87c8e6d8deec41
author: Jean-Marc Valin <[email protected]>
date: Tue Sep 16 09:29:37 EDT 2008
Better use of the arithmetic operators
--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@@ -114,6 +114,20 @@
do { DIVSCALAR( (c).r , div); \
DIVSCALAR( (c).i , div); }while (0)
+#define C_ADD( res, a,b)\
+ do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \
+ }while(0)
+#define C_SUB( res, a,b)\
+ do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \
+ }while(0)
+#define C_ADDTO( res , a)\
+ do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\
+ }while(0)
+
+#define C_SUBFROM( res , a)\
+ do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \
+ }while(0)
+
#else /* MIXED_PRECISION */
# define sround4( x ) (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )
@@ -165,10 +179,13 @@
(c).i *= (s); }while(0)
#endif
+
+
#ifndef CHECK_OVERFLOW_OP
# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
#endif
+#ifndef C_ADD
#define C_ADD( res, a,b)\
do { \
CHECK_OVERFLOW_OP((a).r,+,(b).r)\
@@ -194,7 +211,7 @@
CHECK_OVERFLOW_OP((res).i,-,(a).i)\
(res).r -= (a).r; (res).i -= (a).i; \
}while(0)
-
+#endif /* C_ADD defined */
#ifdef FIXED_POINT
/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -69,8 +69,8 @@
{
int shift = celt_ilog2(maxval)-10;
j=eBands[i]; do {
- sum += MULT16_16(EXTRACT16(VSHR32(X[j*C+c],shift)),
- EXTRACT16(VSHR32(X[j*C+c],shift)));
+ sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j*C+c],shift)),
+ EXTRACT16(VSHR32(X[j*C+c],shift)));
} while (++j<eBands[i+1]);
/* We're adding one here to make damn sure we never end up with a pitch vector that's
larger than unity norm */
@@ -250,8 +250,8 @@
for (j=0;j<len;j++)
{
X[j] = X[2*j];
- E += MULT16_16(X[j],X[j]);
- E2 += MULT16_16(X[2*j+1],X[2*j+1]);
+ E = MAC16_16(E, X[j],X[j]);
+ E2 = MAC16_16(E2, X[2*j+1],X[2*j+1]);
}
#ifndef FIXED_POINT
E = celt_sqrt(E+E2)/celt_sqrt(E);
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -617,8 +617,8 @@
int j;
for (j=0;j<N;j++)
{
- celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
- MULT16_32_Q15(preemph,st->preemph_memD[c]));
+ celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
+ preemph,st->preemph_memD[c]);
st->preemph_memD[c] = tmp;
pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
}
@@ -825,8 +825,8 @@
int j;
for (j=0;j<N;j++)
{
- celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
- MULT16_32_Q15(preemph,st->preemph_memD[c]));
+ celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
+ preemph,st->preemph_memD[c]);
st->preemph_memD[c] = tmp;
pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
}
@@ -994,8 +994,8 @@
int j;
for (j=0;j<N;j++)
{
- celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
- MULT16_32_Q15(preemph,st->preemph_memD[c]));
+ celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
+ preemph,st->preemph_memD[c]);
st->preemph_memD[c] = tmp;
pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
}
--- a/libcelt/fixed_debug.h
+++ b/libcelt/fixed_debug.h
@@ -373,7 +373,7 @@
#define MULT16_32_Q14(a,b) MULT16_32_QX(a,b,14)
#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
#define MULT16_32_P15(a,b) MULT16_32_PX(a,b,15)
-#define MAC16_32_Q15(c,a,b) ADD32((c),MULT16_32_Q15((a),(b)))
+#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
static inline int SATURATE(int a, int b)
{
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -45,6 +45,7 @@
{
int i;
celt_word32_t Ryp, Ryy, Rpp;
+ celt_word16_t ryp, ryy, rpp;
celt_word32_t g;
VARDECL(celt_norm_t, y);
#ifdef FIXED_POINT
@@ -74,16 +75,16 @@
Ryy = MAC16_16(Ryy, y[i], y[i]);
} while (++i < N);
+ ryp = ROUND16(Ryp,14);
+ ryy = ROUND16(Ryy,14);
+ rpp = ROUND16(Rpp,14);
/* g = (sqrt(Ryp^2 + Ryy - Rpp*Ryy)-Ryp)/Ryy */
- g = MULT16_32_Q15(
- celt_sqrt(MULT16_16(ROUND16(Ryp,14),ROUND16(Ryp,14)) + Ryy -
- MULT16_16(ROUND16(Ryy,14),ROUND16(Rpp,14)))
- - ROUND16(Ryp,14),
- celt_rcp(SHR32(Ryy,9)));
+ g = MULT16_32_Q15(celt_sqrt(MAC16_16(Ryy, ryp,ryp) - MULT16_16(ryy,rpp)) - ryp,
+ celt_rcp(SHR32(Ryy,9)));
i=0;
do
- X[i] = P[i] + ROUND16(MULT16_16(y[i], g),11);
+ X[i] = ADD16(P[i], ROUND16(MULT16_16(y[i], g),11));
while (++i < N);
RESTORE_STACK;
@@ -94,7 +95,7 @@
{
VARDECL(celt_norm_t, y);
VARDECL(int, iy);
- VARDECL(int, signx);
+ VARDECL(celt_word16_t, signx);
int j, is;
celt_word16_t s;
int pulsesLeft;
@@ -113,7 +114,7 @@
ALLOC(y, N, celt_norm_t);
ALLOC(iy, N, int);
- ALLOC(signx, N, int);
+ ALLOC(signx, N, celt_word16_t);
N_1 = 512/N;
sum = 0;
@@ -154,7 +155,7 @@
best_id = 0;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
- yy = ADD32(yy, MULT16_16(magnitude,magnitude));
+ yy = MAC16_16(yy, magnitude,magnitude);
/* Choose between fast and accurate strategy depending on where we are in the search */
if (pulsesLeft>1)
{
@@ -165,11 +166,11 @@
do {
celt_word16_t Rxy, Ryy;
/* Select sign based on X[j] alone */
- s = signx[j]*magnitude;
+ s = MULT16_16(signx[j],magnitude);
/* Temporary sums of the new pulse(s) */
- Rxy = EXTRACT16(SHR32(xy + MULT16_16(s,X[j]),rshift));
+ Rxy = EXTRACT16(SHR32(MAC16_16(xy, s,X[j]),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
- Ryy = EXTRACT16(SHR32(yy + MULT16_16(s,y[j]),rshift));
+ Ryy = EXTRACT16(SHR32(MAC16_16(yy, s,y[j]),rshift));
/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
Rxy is positive because the sign is pre-computed) */
@@ -193,12 +194,12 @@
celt_word16_t Rxy, Ryy, Ryp;
celt_word16_t num;
/* Select sign based on X[j] alone */
- s = signx[j]*magnitude;
+ s = MULT16_16(signx[j],magnitude);
/* Temporary sums of the new pulse(s) */
- Rxy = ROUND16(xy + MULT16_16(s,X[j]), 14);
+ Rxy = ROUND16(MAC16_16(xy, s,X[j]), 14);
/* We're multiplying y[j] by two so we don't have to do it here */
- Ryy = ROUND16(yy + MULT16_16(s,y[j]), 14);
- Ryp = ROUND16(yp + MULT16_16(s,P[j]), 14);
+ Ryy = ROUND16(MAC16_16(yy, s,y[j]), 14);
+ Ryp = ROUND16(MAC16_16(yp, s,P[j]), 14);
/* Compute the gain such that ||p + g*y|| = 1
...but instead, we compute g*Ryy to avoid dividing */
@@ -222,7 +223,7 @@
}
j = best_id;
- is = signx[j]*pulsesAtOnce;
+ is = MULT16_16(signx[j],pulsesAtOnce);
s = SHL16(is, yshift);
/* Updating the sums of the new pulse(s) */