shithub: opus

Download patch

ref: cc344fb8ff9649a13d7628c38137f2f65cd65ec4
parent: e0c00e27d8decde15560b35d1ec5139fceb53a81
author: Jean-Marc Valin <[email protected]>
date: Sat Dec 28 14:10:44 EST 2013

Slightly improving the accuracy of the fixed-point MDCT downscale

Also simplifying the code

--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -119,9 +119,9 @@
    VARDECL(kiss_fft_cpx, f2);
    const kiss_fft_state *st = l->kfft[shift];
    const kiss_twiddle_scalar *trig;
+   opus_val16 scale;
 #ifdef FIXED_POINT
    /* FIXME: This should eventually just go in the state. */
-   opus_val16 scale;
    int scale_shift;
    scale_shift = celt_ilog2(st->nfft);
    if (st->nfft == 1<<scale_shift)
@@ -128,6 +128,11 @@
       scale = Q15ONE;
    else
       scale = (1073741824+st->nfft/2)/st->nfft>>(15-scale_shift);
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   scale_shift--;
+#else
+   scale = st->scale;
 #endif
    SAVE_STACK;
 
@@ -195,13 +200,6 @@
          kiss_fft_scalar re, im, yr, yi;
          t0 = t[i];
          t1 = t[N4+i];
-#ifdef FIXED_POINT
-         t0 = MULT16_16_P15(t0, scale);
-         t1 = MULT16_16_P15(t1, scale);
-#else
-         t0 *= st->scale;
-         t1 *= st->scale;
-#endif
          re = *yp++;
          im = *yp++;
          yr = -S_MUL(re,t0)  +  S_MUL(im,t1);
@@ -208,15 +206,13 @@
          yi = -S_MUL(im,t0)  -  S_MUL(re,t1);
          yc.r = yr;
          yc.i = yi;
-#ifdef FIXED_POINT
-         yc.r = SHR32(yc.r, scale_shift);
-         yc.i = SHR32(yc.i, scale_shift);
-#endif
+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
          f2[st->bitrev[i]] = yc;
       }
    }
 
-   /* N/4 complex FFT, down-scales by 4/N */
+   /* N/4 complex FFT, does not downscale anymore */
    opus_fft_impl(st, f2);
 
    /* Post-rotate */