shithub: opus

--- a/celt/mdct.c

+++ b/celt/mdct.c

@@ -119,9 +119,9 @@

    VARDECL(kiss_fft_cpx, f2);

    const kiss_fft_state *st = l->kfft[shift];

    const kiss_twiddle_scalar *trig;

+   opus_val16 scale;

 #ifdef FIXED_POINT

    /* FIXME: This should eventually just go in the state. */

-   opus_val16 scale;

    int scale_shift;

    scale_shift = celt_ilog2(st->nfft);

    if (st->nfft == 1<<scale_shift)

@@ -128,6 +128,11 @@

       scale = Q15ONE;

    else

       scale = (1073741824+st->nfft/2)/st->nfft>>(15-scale_shift);

+   /* Allows us to scale with MULT16_32_Q16(), which is faster than

+      MULT16_32_Q15() on ARM. */

+   scale_shift--;

+#else

+   scale = st->scale;

 #endif

    SAVE_STACK;

@@ -195,13 +200,6 @@

          kiss_fft_scalar re, im, yr, yi;

          t0 = t[i];

          t1 = t[N4+i];

-#ifdef FIXED_POINT

-         t0 = MULT16_16_P15(t0, scale);

-         t1 = MULT16_16_P15(t1, scale);

-#else

-         t0 *= st->scale;

-         t1 *= st->scale;

-#endif

          re = *yp++;

          im = *yp++;

          yr = -S_MUL(re,t0)  +  S_MUL(im,t1);

@@ -208,15 +206,13 @@

          yi = -S_MUL(im,t0)  -  S_MUL(re,t1);

          yc.r = yr;

          yc.i = yi;

-#ifdef FIXED_POINT

-         yc.r = SHR32(yc.r, scale_shift);

-         yc.i = SHR32(yc.i, scale_shift);

-#endif

+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);

+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);

          f2[st->bitrev[i]] = yc;

-   /* N/4 complex FFT, down-scales by 4/N */

+   /* N/4 complex FFT, does not downscale anymore */

    opus_fft_impl(st, f2);

    /* Post-rotate */