shithub: opus

--- a/libcelt/_kiss_fft_guts.h

+++ b/libcelt/_kiss_fft_guts.h

@@ -51,8 +51,13 @@

 # define FRACBITS 31

 # define SAMPPROD celt_int64_t

 #define SAMP_MAX 2147483647

+#ifdef MIXED_PRECISION

+#define TWID_MAX 32767

+#define TRIG_UPSCALE 1

+#else

 #define TRIG_UPSCALE 65536

+#define TWID_MAX 2147483647

+#endif

 #else /* DOUBLE_PRECISION */

 # define FRACBITS 15

@@ -70,9 +75,35 @@

 		fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) );  }

 #endif

 #   define smul(a,b) ( (SAMPPROD)(a)*(b) )

 #   define sround( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> FRACBITS )

+#if MIXED_PRECISION

+#undef MULT16_32_Q15

+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))

+//#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))

+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))

+#   define S_MUL(a,b) MULT16_32_Q15(b, a)

+#   define C_MUL(m,a,b) \

+      do{ (m).r = S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i); \

+          (m).i = S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r); }while(0)

+#   define C_MULC(m,a,b) \

+      do{ (m).r = S_MUL((a).r,(b).r) + S_MUL((a).i,(b).i); \

+          (m).i = S_MUL((a).i,(b).r) - S_MUL((a).r,(b).i); }while(0)

+#   define C_MUL4(m,a,b) \

+      do{ (m).r = SHR(S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i),2); \

+          (m).i = SHR(S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r),2); }while(0)

+#   define C_MULBYSCALAR( c, s ) \

+      do{ (c).r =  S_MUL( (c).r , s ) ;\

+          (c).i =  S_MUL( (c).i , s ) ; }while(0)

+#else /* MIXED_PRECISION */

 #   define sround4( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )

 #   define S_MUL(a,b) sround( smul(a,b) )

@@ -88,6 +119,12 @@

                do{ (m).r = sround4( smul((a).r,(b).r) - smul((a).i,(b).i) ); \

                (m).i = sround4( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)

+#   define C_MULBYSCALAR( c, s ) \

+               do{ (c).r =  sround( smul( (c).r , s ) ) ;\

+               (c).i =  sround( smul( (c).i , s ) ) ; }while(0)

+#endif /* !MIXED_PRECISION */

 #   define DIVSCALAR(x,k) \

 	(x) = sround( smul(  x, SAMP_MAX/k ) )

@@ -95,9 +132,6 @@

 	do {    DIVSCALAR( (c).r , div);  \

 		DIVSCALAR( (c).i  , div); }while (0)

-#   define C_MULBYSCALAR( c, s ) \

-    do{ (c).r =  sround( smul( (c).r , s ) ) ;\

-        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)

 #define L1 32767

@@ -189,8 +223,8 @@

 #ifdef FIXED_POINT

 /*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))

 #  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/

-#  define KISS_FFT_COS(phase)  SAMP_MAX*cos (phase)

-#  define KISS_FFT_SIN(phase)  SAMP_MAX*sin (phase)

+#  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))

+#  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))

 #  define HALF_OF(x) ((x)>>1)

 #elif defined(USE_SIMD)

 #  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )

--- a/libcelt/kiss_fft.c

+++ b/libcelt/kiss_fft.c

@@ -48,18 +48,6 @@

       tw1 = st->twiddles;

       for(j=0;j<m;j++)

-#if 0

-             /* Almost the same as the code path below, except that we divide the input by two

-         (while keeping the best accuracy possible) */

-         celt_word32_t tr, ti;

-         tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);

-         ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);

-         tw1 += fstride;

-         Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);

-         Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);

-         Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);

-         Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);

-#else

          kiss_fft_cpx t;

          Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);

          Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);

@@ -66,8 +54,7 @@

          C_MUL (t,  *Fout2 , *tw1);

          tw1 += fstride;

          C_SUB( *Fout2 ,  *Fout , t );

-         C_ADDTO( *Fout ,  t );

-#endif

+         C_ADDTO( *Fout ,  t );

          ++Fout2;

          ++Fout;

@@ -630,7 +617,7 @@

     if (st) {

         int i;

         st->nfft=nfft;

-#if defined(FIXED_POINT) && !defined(DOUBLE_PRECISION)

+#if defined(FIXED_POINT) && (!defined(DOUBLE_PRECISION) || defined(MIXED_PRECISION))

         for (i=0;i<nfft;++i) {

             celt_word32_t phase = -i;

             kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));

--- a/tests/dft-test.c

+++ b/tests/dft-test.c

@@ -114,6 +114,10 @@

         test1d(120,1);

         test1d(105,0);

         test1d(105,1);

+        test1d(128,0);

+        test1d(128,1);

+        test1d(256,0);

+        test1d(256,1);

     return ret;