ref: d911bc4d0d671d9f18e254427059d6b9c7c074f9
parent: 9ced5d042d1f4167e2bcf181845fad6b0c4aceb4
author: Jean-Marc Valin <[email protected]>
date: Sun Feb 24 12:16:47 EST 2008
Added a mixed-precision version of the FFT with 32-bit data and 16-bit twiddles.
--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@@ -51,8 +51,13 @@
# define FRACBITS 31
# define SAMPPROD celt_int64_t
#define SAMP_MAX 2147483647
+#ifdef MIXED_PRECISION
+#define TWID_MAX 32767
+#define TRIG_UPSCALE 1
+#else
#define TRIG_UPSCALE 65536
-
+#define TWID_MAX 2147483647
+#endif
#else /* DOUBLE_PRECISION */
# define FRACBITS 15
@@ -70,9 +75,35 @@
fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) ); }
#endif
-
# define smul(a,b) ( (SAMPPROD)(a)*(b) )
# define sround( x ) (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> FRACBITS )
+
+#if MIXED_PRECISION
+
+#undef MULT16_32_Q15
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+//#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+# define S_MUL(a,b) MULT16_32_Q15(b, a)
+
+# define C_MUL(m,a,b) \
+ do{ (m).r = S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i); \
+ (m).i = S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r); }while(0)
+
+# define C_MULC(m,a,b) \
+ do{ (m).r = S_MUL((a).r,(b).r) + S_MUL((a).i,(b).i); \
+ (m).i = S_MUL((a).i,(b).r) - S_MUL((a).r,(b).i); }while(0)
+
+# define C_MUL4(m,a,b) \
+ do{ (m).r = SHR(S_MUL((a).r,(b).r) - S_MUL((a).i,(b).i),2); \
+ (m).i = SHR(S_MUL((a).r,(b).i) + S_MUL((a).i,(b).r),2); }while(0)
+
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r = S_MUL( (c).r , s ) ;\
+ (c).i = S_MUL( (c).i , s ) ; }while(0)
+
+#else /* MIXED_PRECISION */
# define sround4( x ) (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )
# define S_MUL(a,b) sround( smul(a,b) )
@@ -88,6 +119,12 @@
do{ (m).r = sround4( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
(m).i = sround4( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r = sround( smul( (c).r , s ) ) ;\
+ (c).i = sround( smul( (c).i , s ) ) ; }while(0)
+
+#endif /* !MIXED_PRECISION */
+
# define DIVSCALAR(x,k) \
(x) = sround( smul( x, SAMP_MAX/k ) )
@@ -95,9 +132,6 @@
do { DIVSCALAR( (c).r , div); \
DIVSCALAR( (c).i , div); }while (0)
-# define C_MULBYSCALAR( c, s ) \
- do{ (c).r = sround( smul( (c).r , s ) ) ;\
- (c).i = sround( smul( (c).i , s ) ) ; }while(0)
#define L1 32767
@@ -189,8 +223,8 @@
#ifdef FIXED_POINT
/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
-# define KISS_FFT_COS(phase) SAMP_MAX*cos (phase)
-# define KISS_FFT_SIN(phase) SAMP_MAX*sin (phase)
+# define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase))
+# define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase))
# define HALF_OF(x) ((x)>>1)
#elif defined(USE_SIMD)
# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
--- a/libcelt/kiss_fft.c
+++ b/libcelt/kiss_fft.c
@@ -48,18 +48,6 @@
tw1 = st->twiddles;
for(j=0;j<m;j++)
{
-#if 0
- /* Almost the same as the code path below, except that we divide the input by two
- (while keeping the best accuracy possible) */
- celt_word32_t tr, ti;
- tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
- ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
- tw1 += fstride;
- Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
- Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
- Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
- Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
-#else
kiss_fft_cpx t;
Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
@@ -66,8 +54,7 @@
C_MUL (t, *Fout2 , *tw1);
tw1 += fstride;
C_SUB( *Fout2 , *Fout , t );
- C_ADDTO( *Fout , t );
-#endif
+ C_ADDTO( *Fout , t );
++Fout2;
++Fout;
}
@@ -630,7 +617,7 @@
if (st) {
int i;
st->nfft=nfft;
-#if defined(FIXED_POINT) && !defined(DOUBLE_PRECISION)
+#if defined(FIXED_POINT) && (!defined(DOUBLE_PRECISION) || defined(MIXED_PRECISION))
for (i=0;i<nfft;++i) {
celt_word32_t phase = -i;
kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
--- a/tests/dft-test.c
+++ b/tests/dft-test.c
@@ -114,6 +114,10 @@
test1d(120,1);
test1d(105,0);
test1d(105,1);
+ test1d(128,0);
+ test1d(128,1);
+ test1d(256,0);
+ test1d(256,1);
}
return ret;
}