ref: 153def288491a4e5ecc4939bdd438006b2eda7b3
parent: 99968abba8f6d488e8ea331b5c9e31aa61212f93
author: Jean-Marc Valin <[email protected]>
date: Sat Dec 21 10:45:17 EST 2013
Getting rid of the inverse FFT entirely IMDCT now uses the forward FFT.
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -76,37 +76,6 @@
}
}
-static void ki_bfly2(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- kiss_fft_cpx * Fout2;
- const kiss_twiddle_cpx * tw1;
- kiss_fft_cpx t;
- int i,j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- Fout2 = Fout + m;
- tw1 = st->twiddles;
- for(j=0;j<m;j++)
- {
- C_MULC (t, *Fout2 , *tw1);
- tw1 += fstride;
- C_SUB( *Fout2 , *Fout , t );
- C_ADDTO( *Fout , t );
- ++Fout2;
- ++Fout;
- }
- }
-}
-
static void kf_bfly4(
kiss_fft_cpx * Fout,
const size_t fstride,
@@ -152,51 +121,7 @@
}
}
-static void ki_bfly4(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- const kiss_twiddle_cpx *tw1,*tw2,*tw3;
- kiss_fft_cpx scratch[6];
- const size_t m2=2*m;
- const size_t m3=3*m;
- int i, j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- tw3 = tw2 = tw1 = st->twiddles;
- for (j=0;j<m;j++)
- {
- C_MULC(scratch[0],Fout[m] , *tw1 );
- C_MULC(scratch[1],Fout[m2] , *tw2 );
- C_MULC(scratch[2],Fout[m3] , *tw3 );
-
- C_SUB( scratch[5] , *Fout, scratch[1] );
- C_ADDTO(*Fout, scratch[1]);
- C_ADD( scratch[3] , scratch[0] , scratch[2] );
- C_SUB( scratch[4] , scratch[0] , scratch[2] );
- C_SUB( Fout[m2], *Fout, scratch[3] );
- tw1 += fstride;
- tw2 += fstride*2;
- tw3 += fstride*3;
- C_ADDTO( *Fout , scratch[3] );
-
- Fout[m].r = scratch[5].r - scratch[4].i;
- Fout[m].i = scratch[5].i + scratch[4].r;
- Fout[m3].r = scratch[5].r + scratch[4].i;
- Fout[m3].i = scratch[5].i - scratch[4].r;
- ++Fout;
- }
- }
-}
-
#ifndef RADIX_TWO_ONLY
static void kf_bfly3(
@@ -250,56 +175,7 @@
}
}
-static void ki_bfly3(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- int i, k;
- const size_t m2 = 2*m;
- const kiss_twiddle_cpx *tw1,*tw2;
- kiss_fft_cpx scratch[5];
- kiss_twiddle_cpx epi3;
- kiss_fft_cpx * Fout_beg = Fout;
- epi3 = st->twiddles[fstride*m];
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- tw1=tw2=st->twiddles;
- k=m;
- do{
-
- C_MULC(scratch[1],Fout[m] , *tw1);
- C_MULC(scratch[2],Fout[m2] , *tw2);
-
- C_ADD(scratch[3],scratch[1],scratch[2]);
- C_SUB(scratch[0],scratch[1],scratch[2]);
- tw1 += fstride;
- tw2 += fstride*2;
-
- Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
- Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
-
- C_MULBYSCALAR( scratch[0] , -epi3.i );
-
- C_ADDTO(*Fout,scratch[3]);
-
- Fout[m2].r = Fout[m].r + scratch[0].i;
- Fout[m2].i = Fout[m].i - scratch[0].r;
-
- Fout[m].r -= scratch[0].i;
- Fout[m].i += scratch[0].r;
-
- ++Fout;
- }while(--k);
- }
-}
-
static void kf_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
@@ -368,74 +244,7 @@
}
}
-static void ki_bfly5(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
- int i, u;
- kiss_fft_cpx scratch[13];
- const kiss_twiddle_cpx * twiddles = st->twiddles;
- const kiss_twiddle_cpx *tw;
- kiss_twiddle_cpx ya,yb;
- kiss_fft_cpx * Fout_beg = Fout;
- ya = twiddles[fstride*m];
- yb = twiddles[fstride*2*m];
- tw=st->twiddles;
-
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- Fout0=Fout;
- Fout1=Fout0+m;
- Fout2=Fout0+2*m;
- Fout3=Fout0+3*m;
- Fout4=Fout0+4*m;
-
- for ( u=0; u<m; ++u ) {
- scratch[0] = *Fout0;
-
- C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
- C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
- C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
- C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
-
- C_ADD( scratch[7],scratch[1],scratch[4]);
- C_SUB( scratch[10],scratch[1],scratch[4]);
- C_ADD( scratch[8],scratch[2],scratch[3]);
- C_SUB( scratch[9],scratch[2],scratch[3]);
-
- Fout0->r += scratch[7].r + scratch[8].r;
- Fout0->i += scratch[7].i + scratch[8].i;
-
- scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
- scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
-
- scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
- scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
-
- C_SUB(*Fout1,scratch[5],scratch[6]);
- C_ADD(*Fout4,scratch[5],scratch[6]);
-
- scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
- scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
- scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
- scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
-
- C_ADD(*Fout2,scratch[11],scratch[12]);
- C_SUB(*Fout3,scratch[11],scratch[12]);
-
- ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
- }
- }
-}
-
#endif
@@ -678,53 +487,7 @@
opus_fft_impl(st, fout);
}
-void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
-{
- int m2, m;
- int p;
- int L;
- int fstride[MAXFACTORS];
- int i;
- int shift;
- /* st->shift can be -1 */
- shift = st->shift>0 ? st->shift : 0;
- fstride[0] = 1;
- L=0;
- do {
- p = st->factors[2*L];
- m = st->factors[2*L+1];
- fstride[L+1] = fstride[L]*p;
- L++;
- } while(m!=1);
- m = st->factors[2*L-1];
- for (i=L-1;i>=0;i--)
- {
- if (i!=0)
- m2 = st->factors[2*i-1];
- else
- m2 = 1;
- switch (st->factors[2*i])
- {
- case 2:
- ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
- case 4:
- ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
-#ifndef RADIX_TWO_ONLY
- case 3:
- ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
- case 5:
- ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
-#endif
- }
- m = m2;
- }
-}
-
#ifdef TEST_UNIT_DFT_C
void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
@@ -733,6 +496,10 @@
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
fout[st->bitrev[i]] = fin[i];
- opus_ifft_impl(st, fout);
+ for (i=0;i<st->nfft;i++)
+ fout[i].i = -fout[i].i;
+ opus_fft_impl(st, fout);
+ for (i=0;i<st->nfft;i++)
+ fout[i].i = -fout[i].i;
}
#endif
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -262,9 +262,10 @@
kiss_fft_cpx yc;
yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
- /* works because the cos is nearly one */
- yc.r = yr - S_MUL(yi,sine);
- yc.i = yi + S_MUL(yr,sine);
+ /* Works because the cos is nearly one. We swap real and imag because we
+ use an FFT instead of an IFFT. */
+ yc.i = yr - S_MUL(yi,sine);
+ yc.r = yi + S_MUL(yr,sine);
/* Storing the pre-rotation directly in the bitrev order. */
yp[*bitrev++] = yc;
xp1+=2*stride;
@@ -272,7 +273,7 @@
}
}
- opus_ifft_impl(l->kfft[shift], f2);
+ opus_fft_impl(l->kfft[shift], f2);
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
@@ -286,15 +287,17 @@
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
- re = f2[i].r;
- im = f2[i].i;
+ /* We swap real and imag because we're using an FFT instead of an IFFT. */
+ re = f2[i].i;
+ im = f2[i].r;
t0 = t[i<<shift];
t1 = t[(N4-i)<<shift];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
- re = f2[N4-i-1].r;
- im = f2[N4-i-1].i;
+ /* We swap real and imag because we're using an FFT instead of an IFFT. */
+ re = f2[N4-i-1].i;
+ im = f2[N4-i-1].r;
/* works because the cos is nearly one */
yp0[0] = -(yr - S_MUL(yi,sine));
yp1[1] = yi + S_MUL(yr,sine);