ref: bc13bbaad7deb1c81dfab7a7c0d0bd3bbd9884ef
parent: 2e26b82ec24060f61bc553d1f36efad49d97167e
author: Jean-Marc Valin <[email protected]>
date: Fri Dec 20 21:33:22 EST 2013
Applying the forward FFT gain up-front for fixed-point too This makes us lose a bit of precision in the first steps, but our gain is more precise because it's only rounded once. Overall, SNR is slightly improved.
--- a/celt/_kiss_fft_guts.h
+++ b/celt/_kiss_fft_guts.h
@@ -65,10 +65,6 @@
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
-# define C_MUL4(m,a,b) \
- do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
- (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
-
# define C_MULBYSCALAR( c, s ) \
do{ (c).r = S_MUL( (c).r , s ) ;\
(c).i = S_MUL( (c).i , s ) ; }while(0)
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -66,8 +66,6 @@
for(j=0;j<m;j++)
{
kiss_fft_cpx t;
- Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
- Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
C_MUL (t, *Fout2 , *tw1);
tw1 += fstride;
C_SUB( *Fout2 , *Fout , t );
@@ -131,12 +129,10 @@
tw3 = tw2 = tw1 = st->twiddles;
for (j=0;j<m;j++)
{
- C_MUL4(scratch[0],Fout[m] , *tw1 );
- C_MUL4(scratch[1],Fout[m2] , *tw2 );
- C_MUL4(scratch[2],Fout[m3] , *tw3 );
+ C_MUL(scratch[0],Fout[m] , *tw1 );
+ C_MUL(scratch[1],Fout[m2] , *tw2 );
+ C_MUL(scratch[2],Fout[m3] , *tw3 );
- Fout->r = PSHR32(Fout->r, 2);
- Fout->i = PSHR32(Fout->i, 2);
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
@@ -227,7 +223,6 @@
tw1=tw2=st->twiddles;
k=m;
do {
- C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
C_MUL(scratch[1],Fout[m] , *tw1);
C_MUL(scratch[2],Fout[m2] , *tw2);
@@ -336,7 +331,6 @@
Fout4=Fout0+4*m;
for ( u=0; u<m; ++u ) {
- C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
scratch[0] = *Fout0;
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@@ -614,6 +608,16 @@
int fstride[MAXFACTORS];
int i;
int shift;
+#ifdef FIXED_POINT
+ /* FIXME: This should eventually just go in the state. */
+ opus_val16 scale;
+ int scale_shift;
+ scale_shift = celt_ilog2(st->nfft);
+ if (st->nfft == 1<<scale_shift)
+ scale = Q15ONE;
+ else
+ scale = (1073741824+st->nfft/2)/st->nfft>>(15-scale_shift);
+#endif
/* st->shift can be -1 */
shift = st->shift>0 ? st->shift : 0;
@@ -622,10 +626,13 @@
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
{
- fout[st->bitrev[i]] = fin[i];
-#ifndef FIXED_POINT
- fout[st->bitrev[i]].r *= st->scale;
- fout[st->bitrev[i]].i *= st->scale;
+ kiss_fft_cpx x = fin[i];
+#ifdef FIXED_POINT
+ fout[st->bitrev[i]].r = SHR32(MULT16_32_Q15(scale, x.r), scale_shift);
+ fout[st->bitrev[i]].i = SHR32(MULT16_32_Q15(scale, x.i), scale_shift);
+#else
+ fout[st->bitrev[i]].r = st->scale*x.r;
+ fout[st->bitrev[i]].i = st->scale*x.i;
#endif
}