shithub: opus

--- a/celt/kiss_fft.c

+++ b/celt/kiss_fft.c

@@ -76,37 +76,6 @@

-static void ki_bfly2(

-                     kiss_fft_cpx * Fout,

-                     const size_t fstride,

-                     const kiss_fft_state *st,

-                     int m,

-                     int N,

-                     int mm

-                    )

-{

-   kiss_fft_cpx * Fout2;

-   const kiss_twiddle_cpx * tw1;

-   kiss_fft_cpx t;

-   int i,j;

-   kiss_fft_cpx * Fout_beg = Fout;

-   for (i=0;i<N;i++)

-   {

-      Fout = Fout_beg + i*mm;

-      Fout2 = Fout + m;

-      tw1 = st->twiddles;

-      for(j=0;j<m;j++)

-      {

-         C_MULC (t,  *Fout2 , *tw1);

-         tw1 += fstride;

-         C_SUB( *Fout2 ,  *Fout , t );

-         C_ADDTO( *Fout ,  t );

-         ++Fout2;

-         ++Fout;

-      }

-   }

-}

 static void kf_bfly4(

                      kiss_fft_cpx * Fout,

                      const size_t fstride,

@@ -152,51 +121,7 @@

-static void ki_bfly4(

-                     kiss_fft_cpx * Fout,

-                     const size_t fstride,

-                     const kiss_fft_state *st,

-                     int m,

-                     int N,

-                     int mm

-                    )

-{

-   const kiss_twiddle_cpx *tw1,*tw2,*tw3;

-   kiss_fft_cpx scratch[6];

-   const size_t m2=2*m;

-   const size_t m3=3*m;

-   int i, j;

-   kiss_fft_cpx * Fout_beg = Fout;

-   for (i=0;i<N;i++)

-   {

-      Fout = Fout_beg + i*mm;

-      tw3 = tw2 = tw1 = st->twiddles;

-      for (j=0;j<m;j++)

-      {

-         C_MULC(scratch[0],Fout[m] , *tw1 );

-         C_MULC(scratch[1],Fout[m2] , *tw2 );

-         C_MULC(scratch[2],Fout[m3] , *tw3 );

-         C_SUB( scratch[5] , *Fout, scratch[1] );

-         C_ADDTO(*Fout, scratch[1]);

-         C_ADD( scratch[3] , scratch[0] , scratch[2] );

-         C_SUB( scratch[4] , scratch[0] , scratch[2] );

-         C_SUB( Fout[m2], *Fout, scratch[3] );

-         tw1 += fstride;

-         tw2 += fstride*2;

-         tw3 += fstride*3;

-         C_ADDTO( *Fout , scratch[3] );

-         Fout[m].r = scratch[5].r - scratch[4].i;

-         Fout[m].i = scratch[5].i + scratch[4].r;

-         Fout[m3].r = scratch[5].r + scratch[4].i;

-         Fout[m3].i = scratch[5].i - scratch[4].r;

-         ++Fout;

-      }

-   }

-}

 #ifndef RADIX_TWO_ONLY

 static void kf_bfly3(

@@ -250,56 +175,7 @@

-static void ki_bfly3(

-                     kiss_fft_cpx * Fout,

-                     const size_t fstride,

-                     const kiss_fft_state *st,

-                     int m,

-                     int N,

-                     int mm

-                    )

-{

-   int i, k;

-   const size_t m2 = 2*m;

-   const kiss_twiddle_cpx *tw1,*tw2;

-   kiss_fft_cpx scratch[5];

-   kiss_twiddle_cpx epi3;

-   kiss_fft_cpx * Fout_beg = Fout;

-   epi3 = st->twiddles[fstride*m];

-   for (i=0;i<N;i++)

-   {

-      Fout = Fout_beg + i*mm;

-      tw1=tw2=st->twiddles;

-      k=m;

-      do{

-         C_MULC(scratch[1],Fout[m] , *tw1);

-         C_MULC(scratch[2],Fout[m2] , *tw2);

-         C_ADD(scratch[3],scratch[1],scratch[2]);

-         C_SUB(scratch[0],scratch[1],scratch[2]);

-         tw1 += fstride;

-         tw2 += fstride*2;

-         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);

-         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);

-         C_MULBYSCALAR( scratch[0] , -epi3.i );

-         C_ADDTO(*Fout,scratch[3]);

-         Fout[m2].r = Fout[m].r + scratch[0].i;

-         Fout[m2].i = Fout[m].i - scratch[0].r;

-         Fout[m].r -= scratch[0].i;

-         Fout[m].i += scratch[0].r;

-         ++Fout;

-      }while(--k);

-   }

-}

 static void kf_bfly5(

                      kiss_fft_cpx * Fout,

                      const size_t fstride,

@@ -368,74 +244,7 @@

-static void ki_bfly5(

-                     kiss_fft_cpx * Fout,

-                     const size_t fstride,

-                     const kiss_fft_state *st,

-                     int m,

-                     int N,

-                     int mm

-                    )

-{

-   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;

-   int i, u;

-   kiss_fft_cpx scratch[13];

-   const kiss_twiddle_cpx * twiddles = st->twiddles;

-   const kiss_twiddle_cpx *tw;

-   kiss_twiddle_cpx ya,yb;

-   kiss_fft_cpx * Fout_beg = Fout;

-   ya = twiddles[fstride*m];

-   yb = twiddles[fstride*2*m];

-   tw=st->twiddles;

-   for (i=0;i<N;i++)

-   {

-      Fout = Fout_beg + i*mm;

-      Fout0=Fout;

-      Fout1=Fout0+m;

-      Fout2=Fout0+2*m;

-      Fout3=Fout0+3*m;

-      Fout4=Fout0+4*m;

-      for ( u=0; u<m; ++u ) {

-         scratch[0] = *Fout0;

-         C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);

-         C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);

-         C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);

-         C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);

-         C_ADD( scratch[7],scratch[1],scratch[4]);

-         C_SUB( scratch[10],scratch[1],scratch[4]);

-         C_ADD( scratch[8],scratch[2],scratch[3]);

-         C_SUB( scratch[9],scratch[2],scratch[3]);

-         Fout0->r += scratch[7].r + scratch[8].r;

-         Fout0->i += scratch[7].i + scratch[8].i;

-         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);

-         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);

-         scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);

-         scratch[6].i =  S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);

-         C_SUB(*Fout1,scratch[5],scratch[6]);

-         C_ADD(*Fout4,scratch[5],scratch[6]);

-         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);

-         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);

-         scratch[12].r =  S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);

-         scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);

-         C_ADD(*Fout2,scratch[11],scratch[12]);

-         C_SUB(*Fout3,scratch[11],scratch[12]);

-         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

-      }

-   }

-}

 #endif

@@ -678,53 +487,7 @@

    opus_fft_impl(st, fout);

-void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)

-{

-   int m2, m;

-   int p;

-   int L;

-   int fstride[MAXFACTORS];

-   int i;

-   int shift;

-   /* st->shift can be -1 */

-   shift = st->shift>0 ? st->shift : 0;

-   fstride[0] = 1;

-   L=0;

-   do {

-      p = st->factors[2*L];

-      m = st->factors[2*L+1];

-      fstride[L+1] = fstride[L]*p;

-      L++;

-   } while(m!=1);

-   m = st->factors[2*L-1];

-   for (i=L-1;i>=0;i--)

-   {

-      if (i!=0)

-         m2 = st->factors[2*i-1];

-      else

-         m2 = 1;

-      switch (st->factors[2*i])

-      {

-      case 2:

-         ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);

-         break;

-      case 4:

-         ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);

-         break;

-#ifndef RADIX_TWO_ONLY

-      case 3:

-         ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);

-         break;

-      case 5:

-         ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);

-         break;

-#endif

-      }

-      m = m2;

-   }

-}

 #ifdef TEST_UNIT_DFT_C

 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

@@ -733,6 +496,10 @@

    /* Bit-reverse the input */

    for (i=0;i<st->nfft;i++)

       fout[st->bitrev[i]] = fin[i];

-   opus_ifft_impl(st, fout);

+   for (i=0;i<st->nfft;i++)

+      fout[i].i = -fout[i].i;

+   opus_fft_impl(st, fout);

+   for (i=0;i<st->nfft;i++)

+      fout[i].i = -fout[i].i;

 #endif

--- a/celt/mdct.c

+++ b/celt/mdct.c

@@ -262,9 +262,10 @@

          kiss_fft_cpx yc;

          yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);

          yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);

-         /* works because the cos is nearly one */

-         yc.r = yr - S_MUL(yi,sine);

-         yc.i = yi + S_MUL(yr,sine);

+         /* Works because the cos is nearly one. We swap real and imag because we

+            use an FFT instead of an IFFT. */

+         yc.i = yr - S_MUL(yi,sine);

+         yc.r = yi + S_MUL(yr,sine);

          /* Storing the pre-rotation directly in the bitrev order. */

          yp[*bitrev++] = yc;

          xp1+=2*stride;

@@ -272,7 +273,7 @@

-   opus_ifft_impl(l->kfft[shift], f2);

+   opus_fft_impl(l->kfft[shift], f2);

    /* Post-rotate and de-shuffle from both ends of the buffer at once to make

       it in-place. */

@@ -286,15 +287,17 @@

          kiss_fft_scalar re, im, yr, yi;

          kiss_twiddle_scalar t0, t1;

-         re = f2[i].r;

-         im = f2[i].i;

+         /* We swap real and imag because we're using an FFT instead of an IFFT. */

+         re = f2[i].i;

+         im = f2[i].r;

          t0 = t[i<<shift];

          t1 = t[(N4-i)<<shift];

          /* We'd scale up by 2 here, but instead it's done when mixing the windows */

          yr = S_MUL(re,t0) - S_MUL(im,t1);

          yi = S_MUL(im,t0) + S_MUL(re,t1);

-         re = f2[N4-i-1].r;

-         im = f2[N4-i-1].i;

+         /* We swap real and imag because we're using an FFT instead of an IFFT. */

+         re = f2[N4-i-1].i;

+         im = f2[N4-i-1].r;

          /* works because the cos is nearly one */

          yp0[0] = -(yr - S_MUL(yi,sine));

          yp1[1] = yi + S_MUL(yr,sine);