shithub: opus

Download patch

ref: d911bc4d0d671d9f18e254427059d6b9c7c074f9
parent: 9ced5d042d1f4167e2bcf181845fad6b0c4aceb4
author: Jean-Marc Valin <[email protected]>
date: Sun Feb 24 12:16:47 EST 2008

Added a mixed-precision version of the FFT with 32-bit data and 16-bit twiddles.

--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@@ -51,8 +51,13 @@
 # define FRACBITS 31
 # define SAMPPROD celt_int64_t 
 #define SAMP_MAX 2147483647
+#ifdef MIXED_PRECISION
+#define TWID_MAX 32767
+#define TRIG_UPSCALE 1
+#else
 #define TRIG_UPSCALE 65536
-
+#define TWID_MAX 2147483647
+#endif
 #else /* DOUBLE_PRECISION */
 
 # define FRACBITS 15
@@ -70,9 +75,35 @@
 		fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) );  }
 #endif
 
-
 #   define smul(a,b) ( (SAMPPROD)(a)*(b) )
 #   define sround( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> FRACBITS )
+
+#if MIXED_PRECISION
+
+#undef MULT16_32_Q15
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+//#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+#   define S_MUL(a,b) MULT16_32_Q15(b, a)
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i); \
+          (m).i = S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r); }while(0)
+
+#   define C_MULC(m,a,b) \
+      do{ (m).r = S_MUL((a).r,(b).r) + S_MUL((a).i,(b).i); \
+          (m).i = S_MUL((a).i,(b).r) - S_MUL((a).r,(b).i); }while(0)
+
+#   define C_MUL4(m,a,b) \
+      do{ (m).r = SHR(S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i),2); \
+          (m).i = SHR(S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r),2); }while(0)
+
+#   define C_MULBYSCALAR( c, s ) \
+      do{ (c).r =  S_MUL( (c).r , s ) ;\
+          (c).i =  S_MUL( (c).i , s ) ; }while(0)
+
+#else /* MIXED_PRECISION */
 #   define sround4( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )
 
 #   define S_MUL(a,b) sround( smul(a,b) )
@@ -88,6 +119,12 @@
                do{ (m).r = sround4( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
                (m).i = sround4( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
 
+#   define C_MULBYSCALAR( c, s ) \
+               do{ (c).r =  sround( smul( (c).r , s ) ) ;\
+               (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
+
+#endif /* !MIXED_PRECISION */
+
 #   define DIVSCALAR(x,k) \
 	(x) = sround( smul(  x, SAMP_MAX/k ) )
 
@@ -95,9 +132,6 @@
 	do {    DIVSCALAR( (c).r , div);  \
 		DIVSCALAR( (c).i  , div); }while (0)
 
-#   define C_MULBYSCALAR( c, s ) \
-    do{ (c).r =  sround( smul( (c).r , s ) ) ;\
-        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
 
                
 #define L1 32767
@@ -189,8 +223,8 @@
 #ifdef FIXED_POINT
 /*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
 #  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
-#  define KISS_FFT_COS(phase)  SAMP_MAX*cos (phase)
-#  define KISS_FFT_SIN(phase)  SAMP_MAX*sin (phase)
+#  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))
+#  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))
 #  define HALF_OF(x) ((x)>>1)
 #elif defined(USE_SIMD)
 #  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
--- a/libcelt/kiss_fft.c
+++ b/libcelt/kiss_fft.c
@@ -48,18 +48,6 @@
       tw1 = st->twiddles;
       for(j=0;j<m;j++)
       {
-#if 0
-             /* Almost the same as the code path below, except that we divide the input by two
-         (while keeping the best accuracy possible) */
-         celt_word32_t tr, ti;
-         tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
-         ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
-         tw1 += fstride;
-         Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
-         Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
-         Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
-         Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
-#else
          kiss_fft_cpx t;
          Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
          Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
@@ -66,8 +54,7 @@
          C_MUL (t,  *Fout2 , *tw1);
          tw1 += fstride;
          C_SUB( *Fout2 ,  *Fout , t );
-         C_ADDTO( *Fout ,  t );         
-#endif
+         C_ADDTO( *Fout ,  t );
          ++Fout2;
          ++Fout;
       }
@@ -630,7 +617,7 @@
     if (st) {
         int i;
         st->nfft=nfft;
-#if defined(FIXED_POINT) && !defined(DOUBLE_PRECISION)
+#if defined(FIXED_POINT) && (!defined(DOUBLE_PRECISION) || defined(MIXED_PRECISION))
         for (i=0;i<nfft;++i) {
             celt_word32_t phase = -i;
             kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
--- a/tests/dft-test.c
+++ b/tests/dft-test.c
@@ -114,6 +114,10 @@
         test1d(120,1);
         test1d(105,0);
         test1d(105,1);
+        test1d(128,0);
+        test1d(128,1);
+        test1d(256,0);
+        test1d(256,1);
     }
     return ret;
 }