ref: cc344fb8ff9649a13d7628c38137f2f65cd65ec4
parent: e0c00e27d8decde15560b35d1ec5139fceb53a81
author: Jean-Marc Valin <[email protected]>
date: Sat Dec 28 14:10:44 EST 2013
Slightly improving the accuracy of the fixed-point MDCT downscale Also simplifying the code
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -119,9 +119,9 @@
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
+ opus_val16 scale;
#ifdef FIXED_POINT
/* FIXME: This should eventually just go in the state. */
- opus_val16 scale;
int scale_shift;
scale_shift = celt_ilog2(st->nfft);
if (st->nfft == 1<<scale_shift)
@@ -128,6 +128,11 @@
scale = Q15ONE;
else
scale = (1073741824+st->nfft/2)/st->nfft>>(15-scale_shift);
+ /* Allows us to scale with MULT16_32_Q16(), which is faster than
+ MULT16_32_Q15() on ARM. */
+ scale_shift--;
+#else
+ scale = st->scale;
#endif
SAVE_STACK;
@@ -195,13 +200,6 @@
kiss_fft_scalar re, im, yr, yi;
t0 = t[i];
t1 = t[N4+i];
-#ifdef FIXED_POINT
- t0 = MULT16_16_P15(t0, scale);
- t1 = MULT16_16_P15(t1, scale);
-#else
- t0 *= st->scale;
- t1 *= st->scale;
-#endif
re = *yp++;
im = *yp++;
yr = -S_MUL(re,t0) + S_MUL(im,t1);
@@ -208,15 +206,13 @@
yi = -S_MUL(im,t0) - S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
-#ifdef FIXED_POINT
- yc.r = SHR32(yc.r, scale_shift);
- yc.i = SHR32(yc.i, scale_shift);
-#endif
+ yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+ yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
f2[st->bitrev[i]] = yc;
}
}
- /* N/4 complex FFT, down-scales by 4/N */
+ /* N/4 complex FFT, does not downscale anymore */
opus_fft_impl(st, f2);
/* Post-rotate */