shithub: opus

Download patch

ref: 153def288491a4e5ecc4939bdd438006b2eda7b3
parent: 99968abba8f6d488e8ea331b5c9e31aa61212f93
author: Jean-Marc Valin <[email protected]>
date: Sat Dec 21 10:45:17 EST 2013

Getting rid of the inverse FFT entirely

IMDCT now uses the forward FFT.

--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -76,37 +76,6 @@
    }
 }
 
-static void ki_bfly2(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   kiss_fft_cpx * Fout2;
-   const kiss_twiddle_cpx * tw1;
-   kiss_fft_cpx t;
-   int i,j;
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      Fout2 = Fout + m;
-      tw1 = st->twiddles;
-      for(j=0;j<m;j++)
-      {
-         C_MULC (t,  *Fout2 , *tw1);
-         tw1 += fstride;
-         C_SUB( *Fout2 ,  *Fout , t );
-         C_ADDTO( *Fout ,  t );
-         ++Fout2;
-         ++Fout;
-      }
-   }
-}
-
 static void kf_bfly4(
                      kiss_fft_cpx * Fout,
                      const size_t fstride,
@@ -152,51 +121,7 @@
    }
 }
 
-static void ki_bfly4(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
-   kiss_fft_cpx scratch[6];
-   const size_t m2=2*m;
-   const size_t m3=3*m;
-   int i, j;
 
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      tw3 = tw2 = tw1 = st->twiddles;
-      for (j=0;j<m;j++)
-      {
-         C_MULC(scratch[0],Fout[m] , *tw1 );
-         C_MULC(scratch[1],Fout[m2] , *tw2 );
-         C_MULC(scratch[2],Fout[m3] , *tw3 );
-
-         C_SUB( scratch[5] , *Fout, scratch[1] );
-         C_ADDTO(*Fout, scratch[1]);
-         C_ADD( scratch[3] , scratch[0] , scratch[2] );
-         C_SUB( scratch[4] , scratch[0] , scratch[2] );
-         C_SUB( Fout[m2], *Fout, scratch[3] );
-         tw1 += fstride;
-         tw2 += fstride*2;
-         tw3 += fstride*3;
-         C_ADDTO( *Fout , scratch[3] );
-
-         Fout[m].r = scratch[5].r - scratch[4].i;
-         Fout[m].i = scratch[5].i + scratch[4].r;
-         Fout[m3].r = scratch[5].r + scratch[4].i;
-         Fout[m3].i = scratch[5].i - scratch[4].r;
-         ++Fout;
-      }
-   }
-}
-
 #ifndef RADIX_TWO_ONLY
 
 static void kf_bfly3(
@@ -250,56 +175,7 @@
    }
 }
 
-static void ki_bfly3(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   int i, k;
-   const size_t m2 = 2*m;
-   const kiss_twiddle_cpx *tw1,*tw2;
-   kiss_fft_cpx scratch[5];
-   kiss_twiddle_cpx epi3;
 
-   kiss_fft_cpx * Fout_beg = Fout;
-   epi3 = st->twiddles[fstride*m];
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      tw1=tw2=st->twiddles;
-      k=m;
-      do{
-
-         C_MULC(scratch[1],Fout[m] , *tw1);
-         C_MULC(scratch[2],Fout[m2] , *tw2);
-
-         C_ADD(scratch[3],scratch[1],scratch[2]);
-         C_SUB(scratch[0],scratch[1],scratch[2]);
-         tw1 += fstride;
-         tw2 += fstride*2;
-
-         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
-         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
-
-         C_MULBYSCALAR( scratch[0] , -epi3.i );
-
-         C_ADDTO(*Fout,scratch[3]);
-
-         Fout[m2].r = Fout[m].r + scratch[0].i;
-         Fout[m2].i = Fout[m].i - scratch[0].r;
-
-         Fout[m].r -= scratch[0].i;
-         Fout[m].i += scratch[0].r;
-
-         ++Fout;
-      }while(--k);
-   }
-}
-
 static void kf_bfly5(
                      kiss_fft_cpx * Fout,
                      const size_t fstride,
@@ -368,74 +244,7 @@
    }
 }
 
-static void ki_bfly5(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-   int i, u;
-   kiss_fft_cpx scratch[13];
-   const kiss_twiddle_cpx * twiddles = st->twiddles;
-   const kiss_twiddle_cpx *tw;
-   kiss_twiddle_cpx ya,yb;
-   kiss_fft_cpx * Fout_beg = Fout;
 
-   ya = twiddles[fstride*m];
-   yb = twiddles[fstride*2*m];
-   tw=st->twiddles;
-
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      Fout0=Fout;
-      Fout1=Fout0+m;
-      Fout2=Fout0+2*m;
-      Fout3=Fout0+3*m;
-      Fout4=Fout0+4*m;
-
-      for ( u=0; u<m; ++u ) {
-         scratch[0] = *Fout0;
-
-         C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
-         C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
-         C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
-         C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
-
-         C_ADD( scratch[7],scratch[1],scratch[4]);
-         C_SUB( scratch[10],scratch[1],scratch[4]);
-         C_ADD( scratch[8],scratch[2],scratch[3]);
-         C_SUB( scratch[9],scratch[2],scratch[3]);
-
-         Fout0->r += scratch[7].r + scratch[8].r;
-         Fout0->i += scratch[7].i + scratch[8].i;
-
-         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
-         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
-
-         scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
-         scratch[6].i =  S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
-
-         C_SUB(*Fout1,scratch[5],scratch[6]);
-         C_ADD(*Fout4,scratch[5],scratch[6]);
-
-         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
-         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
-         scratch[12].r =  S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
-         scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
-
-         C_ADD(*Fout2,scratch[11],scratch[12]);
-         C_SUB(*Fout3,scratch[11],scratch[12]);
-
-         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-      }
-   }
-}
-
 #endif
 
 
@@ -678,53 +487,7 @@
    opus_fft_impl(st, fout);
 }
 
-void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
-{
-   int m2, m;
-   int p;
-   int L;
-   int fstride[MAXFACTORS];
-   int i;
-   int shift;
 
-   /* st->shift can be -1 */
-   shift = st->shift>0 ? st->shift : 0;
-   fstride[0] = 1;
-   L=0;
-   do {
-      p = st->factors[2*L];
-      m = st->factors[2*L+1];
-      fstride[L+1] = fstride[L]*p;
-      L++;
-   } while(m!=1);
-   m = st->factors[2*L-1];
-   for (i=L-1;i>=0;i--)
-   {
-      if (i!=0)
-         m2 = st->factors[2*i-1];
-      else
-         m2 = 1;
-      switch (st->factors[2*i])
-      {
-      case 2:
-         ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-      case 4:
-         ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-#ifndef RADIX_TWO_ONLY
-      case 3:
-         ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-      case 5:
-         ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-#endif
-      }
-      m = m2;
-   }
-}
-
 #ifdef TEST_UNIT_DFT_C
 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
 {
@@ -733,6 +496,10 @@
    /* Bit-reverse the input */
    for (i=0;i<st->nfft;i++)
       fout[st->bitrev[i]] = fin[i];
-   opus_ifft_impl(st, fout);
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
+   opus_fft_impl(st, fout);
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
 }
 #endif
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -262,9 +262,10 @@
          kiss_fft_cpx yc;
          yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
          yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
-         /* works because the cos is nearly one */
-         yc.r = yr - S_MUL(yi,sine);
-         yc.i = yi + S_MUL(yr,sine);
+         /* Works because the cos is nearly one. We swap real and imag because we
+            use an FFT instead of an IFFT. */
+         yc.i = yr - S_MUL(yi,sine);
+         yc.r = yi + S_MUL(yr,sine);
          /* Storing the pre-rotation directly in the bitrev order. */
          yp[*bitrev++] = yc;
          xp1+=2*stride;
@@ -272,7 +273,7 @@
       }
    }
 
-   opus_ifft_impl(l->kfft[shift], f2);
+   opus_fft_impl(l->kfft[shift], f2);
 
    /* Post-rotate and de-shuffle from both ends of the buffer at once to make
       it in-place. */
@@ -286,15 +287,17 @@
       {
          kiss_fft_scalar re, im, yr, yi;
          kiss_twiddle_scalar t0, t1;
-         re = f2[i].r;
-         im = f2[i].i;
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = f2[i].i;
+         im = f2[i].r;
          t0 = t[i<<shift];
          t1 = t[(N4-i)<<shift];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
          yr = S_MUL(re,t0) - S_MUL(im,t1);
          yi = S_MUL(im,t0) + S_MUL(re,t1);
-         re = f2[N4-i-1].r;
-         im = f2[N4-i-1].i;
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = f2[N4-i-1].i;
+         im = f2[N4-i-1].r;
          /* works because the cos is nearly one */
          yp0[0] = -(yr - S_MUL(yi,sine));
          yp1[1] = yi + S_MUL(yr,sine);