shithub: opus

--- a/libcelt/_kiss_fft_guts.h

+++ b/libcelt/_kiss_fft_guts.h

@@ -114,6 +114,20 @@

         do {    DIVSCALAR( (c).r , div);  \

                 DIVSCALAR( (c).i  , div); }while (0)

+#define  C_ADD( res, a,b)\

+    do {(res).r=ADD32((a).r,(b).r);  (res).i=ADD32((a).i,(b).i); \

+    }while(0)

+#define  C_SUB( res, a,b)\

+    do {(res).r=SUB32((a).r,(b).r);  (res).i=SUB32((a).i,(b).i); \

+    }while(0)

+#define C_ADDTO( res , a)\

+    do {(res).r = ADD32((res).r, (a).r);  (res).i = ADD32((res).i,(a).i);\

+    }while(0)

+#define C_SUBFROM( res , a)\

+    do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \

+    }while(0)

 #else /* MIXED_PRECISION */

 #   define sround4( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )

@@ -165,10 +179,13 @@

         (c).i *= (s); }while(0)

 #endif

 #ifndef CHECK_OVERFLOW_OP

 #  define CHECK_OVERFLOW_OP(a,op,b) /* noop */

 #endif

+#ifndef C_ADD

 #define  C_ADD( res, a,b)\

     do { \

 	    CHECK_OVERFLOW_OP((a).r,+,(b).r)\

@@ -194,7 +211,7 @@

 	    CHECK_OVERFLOW_OP((res).i,-,(a).i)\

 	    (res).r -= (a).r;  (res).i -= (a).i; \

     }while(0)

+#endif /* C_ADD defined */

 #ifdef FIXED_POINT

 /*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))

--- a/libcelt/bands.c

+++ b/libcelt/bands.c

@@ -69,8 +69,8 @@

             int shift = celt_ilog2(maxval)-10;

             j=eBands[i]; do {

-               sum += MULT16_16(EXTRACT16(VSHR32(X[j*C+c],shift)),

-                                EXTRACT16(VSHR32(X[j*C+c],shift)));

+               sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j*C+c],shift)),

+                                   EXTRACT16(VSHR32(X[j*C+c],shift)));

             } while (++j<eBands[i+1]);

             /* We're adding one here to make damn sure we never end up with a pitch vector that's

                larger than unity norm */

@@ -250,8 +250,8 @@

    for (j=0;j<len;j++)

       X[j] = X[2*j];

-      E += MULT16_16(X[j],X[j]);

-      E2 += MULT16_16(X[2*j+1],X[2*j+1]);

+      E = MAC16_16(E, X[j],X[j]);

+      E2 = MAC16_16(E2, X[2*j+1],X[2*j+1]);

 #ifndef FIXED_POINT

    E  = celt_sqrt(E+E2)/celt_sqrt(E);

--- a/libcelt/celt.c

+++ b/libcelt/celt.c

@@ -617,8 +617,8 @@

          int j;

          for (j=0;j<N;j++)

-            celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],

-                                   MULT16_32_Q15(preemph,st->preemph_memD[c]));

+            celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],

+                                   preemph,st->preemph_memD[c]);

             st->preemph_memD[c] = tmp;

             pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));

@@ -825,8 +825,8 @@

       int j;

       for (j=0;j<N;j++)

-         celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],

-                                MULT16_32_Q15(preemph,st->preemph_memD[c]));

+         celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],

+                                preemph,st->preemph_memD[c]);

          st->preemph_memD[c] = tmp;

          pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));

@@ -994,8 +994,8 @@

       int j;

       for (j=0;j<N;j++)

-         celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],

-                                MULT16_32_Q15(preemph,st->preemph_memD[c]));

+         celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],

+                                preemph,st->preemph_memD[c]);

          st->preemph_memD[c] = tmp;

          pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));

--- a/libcelt/fixed_debug.h

+++ b/libcelt/fixed_debug.h

@@ -373,7 +373,7 @@

 #define MULT16_32_Q14(a,b) MULT16_32_QX(a,b,14)

 #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)

 #define MULT16_32_P15(a,b) MULT16_32_PX(a,b,15)

-#define MAC16_32_Q15(c,a,b) ADD32((c),MULT16_32_Q15((a),(b)))

+#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))

 static inline int SATURATE(int a, int b)

--- a/libcelt/vq.c

+++ b/libcelt/vq.c

@@ -45,6 +45,7 @@

    int i;

    celt_word32_t Ryp, Ryy, Rpp;

+   celt_word16_t ryp, ryy, rpp;

    celt_word32_t g;

    VARDECL(celt_norm_t, y);

 #ifdef FIXED_POINT

@@ -74,16 +75,16 @@

       Ryy = MAC16_16(Ryy, y[i], y[i]);

    } while (++i < N);

+   ryp = ROUND16(Ryp,14);

+   ryy = ROUND16(Ryy,14);

+   rpp = ROUND16(Rpp,14);

    /* g = (sqrt(Ryp^2 + Ryy - Rpp*Ryy)-Ryp)/Ryy */

-   g = MULT16_32_Q15(

-            celt_sqrt(MULT16_16(ROUND16(Ryp,14),ROUND16(Ryp,14)) + Ryy -

-                      MULT16_16(ROUND16(Ryy,14),ROUND16(Rpp,14)))

-            - ROUND16(Ryp,14),

-       celt_rcp(SHR32(Ryy,9)));

+   g = MULT16_32_Q15(celt_sqrt(MAC16_16(Ryy, ryp,ryp) - MULT16_16(ryy,rpp)) - ryp,

+                     celt_rcp(SHR32(Ryy,9)));

    i=0;

do

-      X[i] = P[i] + ROUND16(MULT16_16(y[i], g),11);

+      X[i] = ADD16(P[i], ROUND16(MULT16_16(y[i], g),11));

    while (++i < N);

    RESTORE_STACK;

@@ -94,7 +95,7 @@

    VARDECL(celt_norm_t, y);

    VARDECL(int, iy);

-   VARDECL(int, signx);

+   VARDECL(celt_word16_t, signx);

    int j, is;

    celt_word16_t s;

    int pulsesLeft;

@@ -113,7 +114,7 @@

    ALLOC(y, N, celt_norm_t);

    ALLOC(iy, N, int);

-   ALLOC(signx, N, int);

+   ALLOC(signx, N, celt_word16_t);

    N_1 = 512/N;

    sum = 0;

@@ -154,7 +155,7 @@

       best_id = 0;

       /* The squared magnitude term gets added anyway, so we might as well

          add it outside the loop */

-      yy = ADD32(yy, MULT16_16(magnitude,magnitude));

+      yy = MAC16_16(yy, magnitude,magnitude);

       /* Choose between fast and accurate strategy depending on where we are in the search */

       if (pulsesLeft>1)

@@ -165,11 +166,11 @@

          do {

             celt_word16_t Rxy, Ryy;

             /* Select sign based on X[j] alone */

-            s = signx[j]*magnitude;

+            s = MULT16_16(signx[j],magnitude);

             /* Temporary sums of the new pulse(s) */

-            Rxy = EXTRACT16(SHR32(xy + MULT16_16(s,X[j]),rshift));

+            Rxy = EXTRACT16(SHR32(MAC16_16(xy, s,X[j]),rshift));

             /* We're multiplying y[j] by two so we don't have to do it here */

-            Ryy = EXTRACT16(SHR32(yy + MULT16_16(s,y[j]),rshift));

+            Ryy = EXTRACT16(SHR32(MAC16_16(yy, s,y[j]),rshift));

             /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that

                Rxy is positive because the sign is pre-computed) */

@@ -193,12 +194,12 @@

             celt_word16_t Rxy, Ryy, Ryp;

             celt_word16_t num;

             /* Select sign based on X[j] alone */

-            s = signx[j]*magnitude;

+            s = MULT16_16(signx[j],magnitude);

             /* Temporary sums of the new pulse(s) */

-            Rxy = ROUND16(xy + MULT16_16(s,X[j]), 14);

+            Rxy = ROUND16(MAC16_16(xy, s,X[j]), 14);

             /* We're multiplying y[j] by two so we don't have to do it here */

-            Ryy = ROUND16(yy + MULT16_16(s,y[j]), 14);

-            Ryp = ROUND16(yp + MULT16_16(s,P[j]), 14);

+            Ryy = ROUND16(MAC16_16(yy, s,y[j]), 14);

+            Ryp = ROUND16(MAC16_16(yp, s,P[j]), 14);

             /* Compute the gain such that ||p + g*y|| = 1

                ...but instead, we compute g*Ryy to avoid dividing */

@@ -222,7 +223,7 @@

       j = best_id;

-      is = signx[j]*pulsesAtOnce;

+      is = MULT16_16(signx[j],pulsesAtOnce);

       s = SHL16(is, yshift);

       /* Updating the sums of the new pulse(s) */