ref: 30e1b4d36c432b5211e7be7d0066e91a846ce04d
parent: ba11d78372ef22f240aea5235af3036760abe1fa
author: Jean-Marc Valin <[email protected]>
date: Mon Apr 21 20:10:52 EDT 2008
Moved the windowing operation from compute_mdcts() to mdct_forward() in an attempt to reduce copying.
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -154,44 +154,34 @@
static void compute_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t * restrict in, celt_sig_t * restrict out)
{
int c, N4;
- VARDECL(celt_word32_t, x);
- VARDECL(celt_word32_t, tmp);
- const int C = CHANNELS(mode);
const mdct_lookup *lookup = MDCT(mode);
const int N = FRAMESIZE(mode);
+ const int C = CHANNELS(mode);
const int overlap = OVERLAP(mode);
- SAVE_STACK;
N4 = (N-overlap)>>1;
- ALLOC(x, 2*N, celt_word32_t);
- ALLOC(tmp, N, celt_word32_t);
- for (c=0;c<C;c++)
+ if (C==1)
{
- int j;
- celt_word32_t * restrict x1, * restrict x2;
- for (j=0;j<2*N-2*N4;j++)
- x[j+N4] = in[C*j+c];
- x1 = x+N4;
- x2 = x+2*N-N4-1;
- for (j=0;j<overlap;j++)
+ mdct_forward(lookup, in-N4, out, window, overlap);
+ } else {
+ VARDECL(celt_word32_t, x);
+ VARDECL(celt_word32_t, tmp);
+ SAVE_STACK;
+ ALLOC(x, 2*N, celt_word32_t);
+ ALLOC(tmp, N, celt_word32_t);
+ for (c=0;c<C;c++)
{
- *x1 = MULT16_32_Q15(window[j],*x1);
- *x2 = MULT16_32_Q15(window[j],*x2);
- x1++;
- x2--;
- }
- CELT_MEMSET(x, 0, N4);
- CELT_MEMSET(x+2*N-N4, 0, N4);
- if (C==1)
- {
- mdct_forward(lookup, x, out);
- } else {
- mdct_forward(lookup, x, tmp);
+ int j;
+ for (j=0;j<2*N-2*N4;j++)
+ x[j+N4] = in[C*j+c];
+ CELT_MEMSET(x, 0, N4);
+ CELT_MEMSET(x+2*N-N4, 0, N4);
+ mdct_forward(lookup, x, tmp, window, overlap);
/* Interleaving the sub-frames */
for (j=0;j<N;j++)
out[C*j+c] = tmp[j];
}
+ RESTORE_STACK;
}
- RESTORE_STACK;
}
/** Compute the IMDCT and apply window for all sub-frames and all channels in a frame */
--- a/libcelt/mdct.c
+++ b/libcelt/mdct.c
@@ -86,7 +86,7 @@
celt_free(l->trig);
}
-void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out)
+void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16_t *window, int overlap)
{
int i;
int N, N2, N4;
@@ -105,26 +105,46 @@
const kiss_fft_scalar * restrict xp2 = in+N2+N4-1;
kiss_fft_scalar * restrict yp = out;
kiss_fft_scalar *t = &l->trig[0];
- for(i=0;i<N/8;i++)
+ const celt_word16_t * restrict wp1 = window+overlap/2;
+ const celt_word16_t * restrict wp2 = window+overlap/2-1;
+ for(i=0;i<overlap/4;i++)
{
kiss_fft_scalar re, im;
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
- re = -HALF32(xp1[N2] + *xp2);
- im = -HALF32(*xp1 - xp2[-N2]);
+ re = -HALF32(MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2));
+ im = -HALF32(MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]));
xp1+=2;
xp2-=2;
+ wp1+=2;
+ wp2-=2;
/* We could remove the HALF32 above and just use MULT16_32_Q16 below
+ (MIXED_PRECISION only) */
+ *yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]);
+ *yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
+ t++;
+ }
+ for(;i<N/8;i++)
+ {
+ kiss_fft_scalar re, im;
+ /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+ re = -HALF32(*xp2);
+ im = -HALF32(*xp1);
+ xp1+=2;
+ xp2-=2;
+ /* We could remove the HALF32 above and just use MULT16_32_Q16 below
(MIXED_PRECISION only) */
*yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]);
*yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
t++;
}
- for(;i<N4;i++)
+ wp1 = window;
+ wp2 = window+overlap-1;
+ for(;i<N4-overlap/4;i++)
{
kiss_fft_scalar re, im;
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
- re = HALF32(xp1[-N2] - *xp2);
- im = -HALF32(*xp1 + xp2[N2]);
+ re = HALF32(-*xp2);
+ im = -HALF32(*xp1);
xp1+=2;
xp2-=2;
/* We could remove the HALF32 above and just use MULT16_32_Q16 below
@@ -132,6 +152,22 @@
*yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]);
*yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
t++;
+ }
+ for(;i<N4;i++)
+ {
+ kiss_fft_scalar re, im;
+ /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+ re = HALF32(MULT16_32_Q15(*wp1, xp1[-N2]) - MULT16_32_Q15(*wp2, *xp2));
+ im = -HALF32(MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]));
+ xp1+=2;
+ xp2-=2;
+ wp1+=2;
+ wp2-=2;
+ /* We could remove the HALF32 above and just use MULT16_32_Q16 below
+ (MIXED_PRECISION only) */
+ *yp++ = S_MUL(re,t[0]) - S_MUL(im,t[N4]);
+ *yp++ = S_MUL(im,t[0]) + S_MUL(re,t[N4]);
+ t++;
}
}
--- a/libcelt/mdct.h
+++ b/libcelt/mdct.h
@@ -57,7 +57,7 @@
void mdct_clear(mdct_lookup *l);
/** Compute a forward MDCT and scale by 2/N */
-void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out);
+void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16_t *window, int overlap);
/** Compute a backward MDCT (no scaling) */
void mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out);
--- a/tests/mdct-test.c
+++ b/tests/mdct-test.c
@@ -102,7 +102,7 @@
mdct_backward(&cfg,in,out);
check_inv(in,out,nfft,isinverse);
} else {
- mdct_forward(&cfg,in,out);
+ mdct_forward(&cfg,in,out,NULL, 0);
check(in,out,nfft,isinverse);
}
/*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/