shithub: opus

--- a/celt/celt_encoder.c

+++ b/celt/celt_encoder.c

@@ -576,7 +576,7 @@

    *tf_sum = 0;

    for (i=0;i<len;i++)

-      int j, k, N;

+      int k, N;

       int narrow;

       opus_val32 L1, best_L1;

       int best_level=0;

@@ -768,10 +768,8 @@

       /* Compute inter-channel correlation for low frequencies */

       for (i=0;i<8;i++)

-         int j;

-         opus_val32 partial = 0;

-         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)

-            partial = MAC16_16(partial, X[j], X[N0+j]);

+         opus_val32 partial;

+         partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], (m->eBands[i+1]-m->eBands[i])<<LM);

          sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));

       sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);

@@ -779,10 +777,8 @@

       minXC = sum;

       for (i=8;i<intensity;i++)

-         int j;

-         opus_val32 partial = 0;

-         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)

-            partial = MAC16_16(partial, X[j], X[N0+j]);

+         opus_val32 partial;

+         partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], (m->eBands[i+1]-m->eBands[i])<<LM);

          minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));

       minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));

--- a/celt/pitch.c

+++ b/celt/pitch.c

@@ -252,7 +252,7 @@

 #endif

 celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)

-   int i,j;

+   int i;

    /*The EDSP version requires that max_pitch is at least 1, and that _x is

       32-bit aligned.

      Since it's hard to put asserts in assembly, put them here.*/

@@ -279,9 +279,8 @@

    /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */

    for (;i<max_pitch;i++)

-      opus_val32 sum = 0;

-      for (j=0;j<len;j++)

-         sum = MAC16_16(sum, _x[j],_y[i+j]);

+      opus_val32 sum;

+      sum = celt_inner_prod(_x, _y+i, len);

       xcorr[i] = sum;

 #ifdef FIXED_POINT

       maxcorr = MAX32(maxcorr, sum);

@@ -361,12 +360,17 @@

 #endif

    for (i=0;i<max_pitch>>1;i++)

-      opus_val32 sum=0;

+      opus_val32 sum;

       xcorr[i] = 0;

       if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)

          continue;

+#ifdef FIXED_POINT

+      sum = 0;

       for (j=0;j<len>>1;j++)

          sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);

+#else

+      sum = celt_inner_prod(x_lp, y+i, len>>1);

+#endif

       xcorr[i] = MAX32(-1, sum);

 #ifdef FIXED_POINT

       maxcorr = MAX32(maxcorr, sum);

@@ -513,13 +517,7 @@

       pg = SHR32(frac_div32(best_xy,best_yy+1),16);

    for (k=0;k<3;k++)

-   {

-      int T1 = T+k-1;

-      xy = 0;

-      for (i=0;i<N;i++)

-         xy = MAC16_16(xy, x[i], x[i-T1]);

-      xcorr[k] = xy;

-   }

+      xcorr[k] = celt_inner_prod(x, x-(T+k-1), N);

    if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))

       offset = 1;

    else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))

--- a/celt/pitch.h

+++ b/celt/pitch.h

@@ -141,6 +141,18 @@

 #endif

+#ifndef OVERRIDE_CELT_INNER_PROD

+static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y,

+      int N)

+{

+   int i;

+   opus_val32 xy=0;

+   for (i=0;i<N;i++)

+      xy = MAC16_16(xy, x[i], y[i]);

+   return xy;

+}

+#endif

 #ifdef FIXED_POINT

 opus_val32

 #else

--- a/celt/vq.c

+++ b/celt/vq.c

@@ -37,6 +37,7 @@

 #include "os_support.h"

 #include "bands.h"

 #include "rate.h"

+#include "pitch.h"

 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)

@@ -350,15 +351,11 @@

 #ifdef FIXED_POINT

    int k;

 #endif

-   opus_val32 E = EPSILON;

+   opus_val32 E;

    opus_val16 g;

    opus_val32 t;

-   celt_norm *xptr = X;

-   for (i=0;i<N;i++)

-   {

-      E = MAC16_16(E, *xptr, *xptr);

-      xptr++;

-   }

+   celt_norm *xptr;

+   E = EPSILON + celt_inner_prod(X, X, N);

 #ifdef FIXED_POINT

    k = celt_ilog2(E)>>1;

 #endif

@@ -393,14 +390,8 @@

          Eside = MAC16_16(Eside, s, s);

    } else {

-      for (i=0;i<N;i++)

-      {

-         celt_norm m, s;

-         m = X[i];

-         s = Y[i];

-         Emid = MAC16_16(Emid, m, m);

-         Eside = MAC16_16(Eside, s, s);

-      }

+      Emid += celt_inner_prod(X, X, N);

+      Eside += celt_inner_prod(Y, Y, N);

    mid = celt_sqrt(Emid);

    side = celt_sqrt(Eside);

--- a/celt/x86/pitch_sse.h

+++ b/celt/x86/pitch_sse.h

@@ -101,6 +101,32 @@

+#define OVERRIDE_CELT_INNER_PROD

+static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y,

+      int N)

+{

+   int i;

+   float xy;

+   __m128 sum;

+   sum = _mm_setzero_ps();

+   /* FIXME: We should probably go 8-way and use 2 sums. */

+   for (i=0;i<N-3;i+=4)

+   {

+      __m128 xi = _mm_loadu_ps(x+i);

+      __m128 yi = _mm_loadu_ps(y+i);

+      sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));

+   }

+   /* Horizontal sum */

+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));

+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));

+   _mm_store_ss(&xy, sum);

+   for (;i<N;i++)

+   {

+      xy = MAC16_16(xy, x[i], y[i]);

+   }

+   return xy;

+}

 #define OVERRIDE_COMB_FILTER_CONST

 static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,

       opus_val16 g10, opus_val16 g11, opus_val16 g12)