shithub: opus

Download patch

ref: 25649c1569c3980cc9252f4ac57eb20d8e6792be
parent: 37d13ceebeb7d08f30cb2895f7c1f0ba04c703b0
author: Jean-Marc Valin <[email protected]>
date: Fri Feb 22 11:19:39 EST 2008

Fixed the FFT for higher precision

--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@@ -45,10 +45,23 @@
  * */
 #ifdef FIXED_POINT
 #include "arch.h"
+
+#ifdef DOUBLE_PRECISION
+
+# define FRACBITS 31
+# define SAMPPROD celt_int64_t 
+#define SAMP_MAX 2147483647
+#define TRIG_UPSCALE 65536
+
+#else /* DOUBLE_PRECISION */
+
 # define FRACBITS 15
 # define SAMPPROD celt_int32_t 
 #define SAMP_MAX 32767
+#define TRIG_UPSCALE 1
 
+#endif /* !DOUBLE_PRECISION */
+
 #define SAMP_MIN -SAMP_MAX
 
 #if defined(CHECK_OVERFLOW)
@@ -59,7 +72,8 @@
 
 
 #   define smul(a,b) ( (SAMPPROD)(a)*(b) )
-#   define sround( x )  (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
+#   define sround( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> FRACBITS )
+#   define sround4( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )
 
 #   define S_MUL(a,b) sround( smul(a,b) )
 
@@ -71,8 +85,8 @@
           (m).i = sround( smul((a).i,(b).r) - smul((a).r,(b).i) ); }while(0)
 
 #   define C_MUL4(m,a,b) \
-               do{ (m).r = PSHR32( smul((a).r,(b).r) - smul((a).i,(b).i),17 ); \
-               (m).i = PSHR32( smul((a).r,(b).i) + smul((a).i,(b).r),17 ); }while(0)
+               do{ (m).r = sround4( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+               (m).i = sround4( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
 
 #   define DIVSCALAR(x,k) \
 	(x) = sround( smul(  x, SAMP_MAX/k ) )
@@ -173,8 +187,10 @@
 
 
 #ifdef FIXED_POINT
-#  define KISS_FFT_COS(phase)  floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
-#  define KISS_FFT_SIN(phase)  floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))
+/*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
+#  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
+#  define KISS_FFT_COS(phase)  SAMP_MAX*cos (phase)
+#  define KISS_FFT_SIN(phase)  SAMP_MAX*sin (phase)
 #  define HALF_OF(x) ((x)>>1)
 #elif defined(USE_SIMD)
 #  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
@@ -191,12 +207,12 @@
 		(x)->r = KISS_FFT_COS(phase);\
 		(x)->i = KISS_FFT_SIN(phase);\
 	}while(0)
+   
 #define  kf_cexp2(x,phase) \
-               do{ \
-               (x)->r = celt_cos_norm((phase));\
-               (x)->i = celt_cos_norm((phase)-32768);\
+   do{ \
+      (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
+      (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
 }while(0)
-
 
 /* a debugging function */
 #define pcpx(c)\
--- a/libcelt/kiss_fft.c
+++ b/libcelt/kiss_fft.c
@@ -48,6 +48,7 @@
       tw1 = st->twiddles;
       for(j=0;j<m;j++)
       {
+#if 0
              /* Almost the same as the code path below, except that we divide the input by two
          (while keeping the best accuracy possible) */
          celt_word32_t tr, ti;
@@ -58,6 +59,15 @@
          Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
          Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
          Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
+#else
+         Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
+         Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
+         kiss_fft_cpx t;
+         C_MUL (t,  *Fout2 , *tw1);
+         tw1 += fstride;
+         C_SUB( *Fout2 ,  *Fout , t );
+         C_ADDTO( *Fout ,  t );         
+#endif
          ++Fout2;
          ++Fout;
       }
@@ -121,14 +131,14 @@
          C_MUL4(scratch[1],Fout[m2] , *tw2 );
          C_MUL4(scratch[2],Fout[m3] , *tw3 );
              
-         Fout->r = PSHR16(Fout->r, 2);
-         Fout->i = PSHR16(Fout->i, 2);
+         Fout->r = PSHR(Fout->r, 2);
+         Fout->i = PSHR(Fout->i, 2);
          C_SUB( scratch[5] , *Fout, scratch[1] );
          C_ADDTO(*Fout, scratch[1]);
          C_ADD( scratch[3] , scratch[0] , scratch[2] );
          C_SUB( scratch[4] , scratch[0] , scratch[2] );
-         Fout[m2].r = PSHR16(Fout[m2].r, 2);
-         Fout[m2].i = PSHR16(Fout[m2].i, 2);
+         Fout[m2].r = PSHR(Fout[m2].r, 2);
+         Fout[m2].i = PSHR(Fout[m2].i, 2);
          C_SUB( Fout[m2], *Fout, scratch[3] );
          tw1 += fstride;
          tw2 += fstride*2;
@@ -620,7 +630,7 @@
     if (st) {
         int i;
         st->nfft=nfft;
-#ifdef FIXED_POINT
+#if defined(FIXED_POINT) && !defined(DOUBLE_PRECISION)
         for (i=0;i<nfft;++i) {
             celt_word32_t phase = -i;
             kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
--- a/libcelt/kiss_fft.h
+++ b/libcelt/kiss_fft.h
@@ -33,7 +33,11 @@
 
 #ifdef FIXED_POINT
 #include "arch.h"	
+#ifdef DOUBLE_PRECISION
+#  define kiss_fft_scalar celt_int32_t
+#else
 #  define kiss_fft_scalar celt_int16_t
+#endif
 #else
 # ifndef kiss_fft_scalar
 /*  default is float */
--- a/tests/dft-test.c
+++ b/tests/dft-test.c
@@ -58,6 +58,13 @@
         in[k].i = (rand() % 65536) - 32768;
     }
 
+#ifdef DOUBLE_PRECISION
+    for (k=0;k<nfft;++k) {
+       in[k].r *= 65536;
+       in[k].i *= 65536;
+    }
+#endif
+    
     if (isinverse)
     {
        for (k=0;k<nfft;++k) {