shithub: opus

--- a/libcelt/celt.c

+++ b/libcelt/celt.c

@@ -317,11 +317,9 @@

    const int C = CHANNELS(_C);

    if (C==1 && !shortBlocks)

-      const mdct_lookup *lookup = &mode->mdct[LM];

       const int overlap = OVERLAP(mode);

-      clt_mdct_forward(lookup, in, out, mode->window, overlap);

+      clt_mdct_forward(&mode->mdct, in, out, mode->window, overlap, mode->maxLM-LM);

    } else {

-      const mdct_lookup *lookup = &mode->mdct[LM];

       const int overlap = OVERLAP(mode);

       int N = mode->shortMdctSize<<LM;

       int B = 1;

@@ -331,7 +329,7 @@

       SAVE_STACK;

       if (shortBlocks)

-         lookup = &mode->mdct[0];

+         /*lookup = &mode->mdct[0];*/

          N = mode->shortMdctSize;

          B = shortBlocks;

@@ -344,7 +342,7 @@

             int j;

             for (j=0;j<N+overlap;j++)

                x[j] = in[C*(b*N+j)+c];

-            clt_mdct_forward(lookup, x, tmp, mode->window, overlap);

+            clt_mdct_forward(&mode->mdct, x, tmp, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);

             /* Interleaving the sub-frames */

             for (j=0;j<N;j++)

                out[(j*B+b)+c*N*B] = tmp[j];

@@ -367,8 +365,7 @@

       int j;

       if (transient_shift==0 && C==1 && !shortBlocks) {

-         const mdct_lookup *lookup = &mode->mdct[LM];

-         clt_mdct_backward(lookup, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap);

+         clt_mdct_backward(&mode->mdct, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap, mode->maxLM-LM);

       } else {

          VARDECL(celt_word32, x);

          VARDECL(celt_word32, tmp);

@@ -376,7 +373,6 @@

          int N2 = N;

          int B = 1;

          int n4offset=0;

-         const mdct_lookup *lookup = &mode->mdct[LM];

          SAVE_STACK;

          ALLOC(x, 2*N, celt_word32);

@@ -384,7 +380,7 @@

          if (shortBlocks)

-            lookup = &mode->mdct[0];

+            /*lookup = &mode->mdct[0];*/

             N2 = mode->shortMdctSize;

             B = shortBlocks;

             n4offset = N4;

@@ -397,7 +393,7 @@

             /* De-interleaving the sub-frames */

             for (j=0;j<N2;j++)

                tmp[j] = X[(j*B+b)+c*N2*B];

-            clt_mdct_backward(lookup, tmp, x+n4offset+N2*b, mode->window, overlap);

+            clt_mdct_backward(&mode->mdct, tmp, x+n4offset+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);

          if (transient_shift > 0)

--- a/libcelt/mdct.c

+++ b/libcelt/mdct.c

@@ -58,7 +58,7 @@

 #define M_PI 3.141592653

 #endif

-void clt_mdct_init(mdct_lookup *l,int N)

+void clt_mdct_init(mdct_lookup *l,int N, int maxshift)

    int i;

    int N2, N4;

@@ -65,11 +65,16 @@

    l->n = N;

    N2 = N>>1;

    N4 = N>>2;

-   l->kfft = cpx32_fft_alloc(N>>2);

+   l->kfft = celt_alloc(sizeof(kiss_fft_cfg)*(maxshift+1));

+   l->maxshift = maxshift;

+   for (i=0;i<=maxshift;i++)

+   {

+      l->kfft[i] = cpx32_fft_alloc(N>>2>>i);

 #ifndef ENABLE_TI_DSPLIB55

-   if (l->kfft==NULL)

-     return;

+      if (l->kfft[i]==NULL)

+         return;

 #endif

+   }

    l->trig = (kiss_twiddle_scalar*)celt_alloc((N4+1)*sizeof(kiss_twiddle_scalar));

    if (l->trig==NULL)

      return;

@@ -90,11 +95,14 @@

 void clt_mdct_clear(mdct_lookup *l)

-   cpx32_fft_free(l->kfft);

+   int i;

+   for (i=0;i<=l->maxshift;i++)

+      cpx32_fft_free(l->kfft[i]);

+   celt_free(l->kfft);

    celt_free(l->trig);

-void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 *window, int overlap)

+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 *window, int overlap, int shift)

    int i;

    int N, N2, N4;

@@ -102,6 +110,7 @@

    VARDECL(kiss_fft_scalar, f);

    SAVE_STACK;

    N = l->n;

+   N >>= shift;

    N2 = N>>1;

    N4 = N>>2;

    ALLOC(f, N2, kiss_fft_scalar);

@@ -161,8 +170,8 @@

          kiss_fft_scalar re, im, yr, yi;

          re = yp[0];

          im = yp[1];

-         yr = -S_MUL(re,t[i])  -  S_MUL(im,t[N4-i]);

-         yi = -S_MUL(im,t[i])  +  S_MUL(re,t[N4-i]);

+         yr = -S_MUL(re,t[i<<shift])  -  S_MUL(im,t[(N4-i)<<shift]);

+         yi = -S_MUL(im,t[i<<shift])  +  S_MUL(re,t[(N4-i)<<shift]);

          /* works because the cos is nearly one */

          *yp++ = yr + S_MUL(yi,sine);

          *yp++ = yi - S_MUL(yr,sine);

@@ -170,7 +179,7 @@

    /* N/4 complex FFT, down-scales by 4/N */

-   cpx32_fft(l->kfft, out, f, N4);

+   cpx32_fft(l->kfft[shift], out, f, N4);

    /* Post-rotate */

@@ -183,8 +192,8 @@

       for(i=0;i<N4;i++)

          kiss_fft_scalar yr, yi;

-         yr = S_MUL(fp[1],t[N4-i]) + S_MUL(fp[0],t[i]);

-         yi = S_MUL(fp[0],t[N4-i]) - S_MUL(fp[1],t[i]);

+         yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);

+         yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);

          /* works because the cos is nearly one */

          *yp1 = yr - S_MUL(yi,sine);

          *yp2 = yi + S_MUL(yr,sine);;

@@ -197,7 +206,7 @@

-void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 * restrict window, int overlap)

+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 * restrict window, int overlap, int shift)

    int i;

    int N, N2, N4;

@@ -206,6 +215,7 @@

    VARDECL(kiss_fft_scalar, f2);

    SAVE_STACK;

    N = l->n;

+   N >>= shift;

    N2 = N>>1;

    N4 = N>>2;

    ALLOC(f, N2, kiss_fft_scalar);

@@ -227,8 +237,8 @@

       for(i=0;i<N4;i++)

          kiss_fft_scalar yr, yi;

-         yr = -S_MUL(*xp2, t[i]) + S_MUL(*xp1,t[N4-i]);

-         yi =  -S_MUL(*xp2, t[N4-i]) - S_MUL(*xp1,t[i]);

+         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);

+         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);

          /* works because the cos is nearly one */

          *yp++ = yr - S_MUL(yi,sine);

          *yp++ = yi + S_MUL(yr,sine);

@@ -238,7 +248,7 @@

    /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */

-   cpx32_ifft(l->kfft, f2, f, N4);

+   cpx32_ifft(l->kfft[shift], f2, f, N4);

    /* Post-rotate */

@@ -251,8 +261,8 @@

          re = fp[0];

          im = fp[1];

          /* We'd scale up by 2 here, but instead it's done when mixing the windows */

-         yr = S_MUL(re,t[i]) - S_MUL(im,t[N4-i]);

-         yi = S_MUL(im,t[i]) + S_MUL(re,t[N4-i]);

+         yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);

+         yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);

          /* works because the cos is nearly one */

          *fp++ = yr - S_MUL(yi,sine);

          *fp++ = yi + S_MUL(yr,sine);

--- a/libcelt/mdct.h

+++ b/libcelt/mdct.h

@@ -51,18 +51,19 @@

 typedef struct {

    int n;

-   kiss_fft_cfg kfft;

+   int maxshift;

+   kiss_fft_cfg *kfft;

    kiss_twiddle_scalar * restrict trig;

 } mdct_lookup;

-void clt_mdct_init(mdct_lookup *l,int N);

+void clt_mdct_init(mdct_lookup *l,int N, int maxshift);

 void clt_mdct_clear(mdct_lookup *l);

 /** Compute a forward MDCT and scale by 4/N */

-void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 *window, int overlap);

+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 *window, int overlap, int shift);

 /** Compute a backward MDCT (no scaling) and performs weighted overlap-add

     (scales implicitly by 1/2) */

-void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 * restrict window, int overlap);

+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 * restrict window, int overlap, int shift);

 #endif

--- a/libcelt/modes.c

+++ b/libcelt/modes.c

@@ -261,6 +261,7 @@

 CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)

    int i;

+   int LM;

 #ifdef STDIN_TUNING

    scanf("%d ", &MIN_BINS);

    scanf("%d ", &BITALLOC_SIZE);

@@ -337,18 +338,20 @@

    if (frame_size >= 640 && (frame_size%16)==0)

-     mode->nbShortMdcts = 8;

+     LM = 3;

    } else if (frame_size >= 320 && (frame_size%8)==0)

-     mode->nbShortMdcts = 4;

+     LM = 2;

    } else if (frame_size >= 160 && (frame_size%4)==0)

-     mode->nbShortMdcts = 2;

+     LM = 1;

    } else

-     mode->nbShortMdcts = 1;

+     LM = 0;

+   mode->maxLM = LM;

+   mode->nbShortMdcts = 1<<LM;

    mode->shortMdctSize = frame_size/mode->nbShortMdcts;

    res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize);

@@ -402,16 +405,14 @@

    mode->logN = logN;

 #endif /* !STATIC_MODES */

-   for (i=0;(1<<i)<=mode->nbShortMdcts;i++)

-   {

-      clt_mdct_init(&mode->mdct[i], 2*mode->shortMdctSize<<i);

-      if ((mode->mdct[i].trig==NULL)

+   clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, LM);

+   if ((mode->mdct.trig==NULL)

 #ifndef ENABLE_TI_DSPLIB55

-           || (mode->mdct[i].kfft==NULL)

+         || (mode->mdct.kfft==NULL)

 #endif

-      )

-        goto failure;

-   }

+   )

+      goto failure;

    mode->prob = quant_prob_alloc(mode);

    if (mode->prob==NULL)

      goto failure;

@@ -487,8 +488,7 @@

    celt_free((celt_int16*)mode->logN);

 #endif

-   for (i=0;(1<<i)<=mode->nbShortMdcts;i++)

-      clt_mdct_clear(&mode->mdct[i]);

+   clt_mdct_clear(&mode->mdct);

    quant_prob_free(mode->prob);

    mode->marker_end = MODEFREED;

--- a/libcelt/modes.h

+++ b/libcelt/modes.h

@@ -98,10 +98,11 @@

    const celt_int16 * const *(_bits[MAX_CONFIG_SIZES]); /**< Cache for pulses->bits mapping in each band */

    /* Stuff that could go in the {en,de}coder, but we save space this way */

-   mdct_lookup mdct[MAX_CONFIG_SIZES];

+   mdct_lookup mdct;

    const celt_word16 *window;

+   int         maxLM;

    int         nbShortMdcts;

    int         shortMdctSize;

--- a/libcelt/os_support.h

+++ b/libcelt/os_support.h

@@ -45,7 +45,7 @@

 /** CELT wrapper for calloc(). To do your own dynamic allocation, all you need to do is replace this function, celt_realloc and celt_free

     NOTE: celt_alloc needs to CLEAR THE MEMORY */

 #ifndef OVERRIDE_CELT_ALLOC

-static inline void *celt_alloc (int size)

+static void *celt_alloc (int size)

    /* WARNING: this is not equivalent to malloc(). If you want to use malloc()

       or your own allocator, YOU NEED TO CLEAR THE MEMORY ALLOCATED. Otherwise

--- a/tests/mdct-test.c

+++ b/tests/mdct-test.c

@@ -89,7 +89,7 @@

     celt_word16  * window= (celt_word16*)malloc(sizeof(celt_word16)*nfft/2);

     int k;

-    clt_mdct_init(&cfg, nfft);

+    clt_mdct_init(&cfg, nfft, 0);

     for (k=0;k<nfft;++k) {

         in[k] = (rand() % 32768) - 16384;

@@ -116,10 +116,10 @@

        for (k=0;k<nfft;++k)

           out[k] = 0;

-       clt_mdct_backward(&cfg,in,out, window, nfft/2);

+       clt_mdct_backward(&cfg,in,out, window, nfft/2, 0);

        check_inv(in,out,nfft,isinverse);

     } else {

-       clt_mdct_forward(&cfg,in,out,window, nfft/2);

+       clt_mdct_forward(&cfg,in,out,window, nfft/2, 0);

        check(in,out,nfft,isinverse);

     /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/