shithub: opus

--- a/libcelt/_kiss_fft_guts.h

+++ b/libcelt/_kiss_fft_guts.h

@@ -45,10 +45,23 @@

  * */

 #ifdef FIXED_POINT

 #include "arch.h"

+#ifdef DOUBLE_PRECISION

+# define FRACBITS 31

+# define SAMPPROD celt_int64_t

+#define SAMP_MAX 2147483647

+#define TRIG_UPSCALE 65536

+#else /* DOUBLE_PRECISION */

 # define FRACBITS 15

 # define SAMPPROD celt_int32_t

 #define SAMP_MAX 32767

+#define TRIG_UPSCALE 1

+#endif /* !DOUBLE_PRECISION */

 #define SAMP_MIN -SAMP_MAX

 #if defined(CHECK_OVERFLOW)

@@ -59,7 +72,8 @@

 #   define smul(a,b) ( (SAMPPROD)(a)*(b) )

-#   define sround( x )  (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )

+#   define sround( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> FRACBITS )

+#   define sround4( x )  (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )

 #   define S_MUL(a,b) sround( smul(a,b) )

@@ -71,8 +85,8 @@

           (m).i = sround( smul((a).i,(b).r) - smul((a).r,(b).i) ); }while(0)

 #   define C_MUL4(m,a,b) \

-               do{ (m).r = PSHR32( smul((a).r,(b).r) - smul((a).i,(b).i),17 ); \

-               (m).i = PSHR32( smul((a).r,(b).i) + smul((a).i,(b).r),17 ); }while(0)

+               do{ (m).r = sround4( smul((a).r,(b).r) - smul((a).i,(b).i) ); \

+               (m).i = sround4( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)

 #   define DIVSCALAR(x,k) \

 	(x) = sround( smul(  x, SAMP_MAX/k ) )

@@ -173,8 +187,10 @@

 #ifdef FIXED_POINT

-#  define KISS_FFT_COS(phase)  floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))

-#  define KISS_FFT_SIN(phase)  floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))

+/*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))

+#  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/

+#  define KISS_FFT_COS(phase)  SAMP_MAX*cos (phase)

+#  define KISS_FFT_SIN(phase)  SAMP_MAX*sin (phase)

 #  define HALF_OF(x) ((x)>>1)

 #elif defined(USE_SIMD)

 #  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )

@@ -191,12 +207,12 @@

 		(x)->r = KISS_FFT_COS(phase);\

 		(x)->i = KISS_FFT_SIN(phase);\

 	}while(0)

 #define  kf_cexp2(x,phase) \

-               do{ \

-               (x)->r = celt_cos_norm((phase));\

-               (x)->i = celt_cos_norm((phase)-32768);\

+   do{ \

+      (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\

+      (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\

 }while(0)

 /* a debugging function */

 #define pcpx(c)\

--- a/libcelt/kiss_fft.c

+++ b/libcelt/kiss_fft.c

@@ -48,6 +48,7 @@

       tw1 = st->twiddles;

       for(j=0;j<m;j++)

+#if 0

              /* Almost the same as the code path below, except that we divide the input by two

          (while keeping the best accuracy possible) */

          celt_word32_t tr, ti;

@@ -58,6 +59,15 @@

          Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);

          Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);

          Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);

+#else

+         Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);

+         Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);

+         kiss_fft_cpx t;

+         C_MUL (t,  *Fout2 , *tw1);

+         tw1 += fstride;

+         C_SUB( *Fout2 ,  *Fout , t );

+         C_ADDTO( *Fout ,  t );

+#endif

          ++Fout2;

          ++Fout;

@@ -121,14 +131,14 @@

          C_MUL4(scratch[1],Fout[m2] , *tw2 );

          C_MUL4(scratch[2],Fout[m3] , *tw3 );

-         Fout->r = PSHR16(Fout->r, 2);

-         Fout->i = PSHR16(Fout->i, 2);

+         Fout->r = PSHR(Fout->r, 2);

+         Fout->i = PSHR(Fout->i, 2);

          C_SUB( scratch[5] , *Fout, scratch[1] );

          C_ADDTO(*Fout, scratch[1]);

          C_ADD( scratch[3] , scratch[0] , scratch[2] );

          C_SUB( scratch[4] , scratch[0] , scratch[2] );

-         Fout[m2].r = PSHR16(Fout[m2].r, 2);

-         Fout[m2].i = PSHR16(Fout[m2].i, 2);

+         Fout[m2].r = PSHR(Fout[m2].r, 2);

+         Fout[m2].i = PSHR(Fout[m2].i, 2);

          C_SUB( Fout[m2], *Fout, scratch[3] );

          tw1 += fstride;

          tw2 += fstride*2;

@@ -620,7 +630,7 @@

     if (st) {

         int i;

         st->nfft=nfft;

-#ifdef FIXED_POINT

+#if defined(FIXED_POINT) && !defined(DOUBLE_PRECISION)

         for (i=0;i<nfft;++i) {

             celt_word32_t phase = -i;

             kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));

--- a/libcelt/kiss_fft.h

+++ b/libcelt/kiss_fft.h

@@ -33,7 +33,11 @@

 #ifdef FIXED_POINT

 #include "arch.h"

+#ifdef DOUBLE_PRECISION

+#  define kiss_fft_scalar celt_int32_t

+#else

 #  define kiss_fft_scalar celt_int16_t

+#endif

 #else

 # ifndef kiss_fft_scalar

 /*  default is float */

--- a/tests/dft-test.c

+++ b/tests/dft-test.c

@@ -58,6 +58,13 @@

         in[k].i = (rand() % 65536) - 32768;

+#ifdef DOUBLE_PRECISION

+    for (k=0;k<nfft;++k) {

+       in[k].r *= 65536;

+       in[k].i *= 65536;

+    }

+#endif

     if (isinverse)

        for (k=0;k<nfft;++k) {