ref: bdc7b9335875a089ea61023cbe6e772599d9deac
parent: 4a6744a4467c58c5c848f2819cd05edeef3ef5ff
author: Jean-Marc Valin <[email protected]>
date: Mon Jan 6 03:58:38 EST 2014
Reduces decoder stack usage by only storing one channel of denormalized MDCT
--- a/celt/bands.c
+++ b/celt/bands.c
@@ -194,76 +194,73 @@
/* De-normalise the energy to produce the synthesis from the unit-energy bands */
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
- int end, int C, int M, int downsample, int silence)
+ int end, int M, int downsample, int silence)
{
- int i, c, N;
+ int i, N;
int bound;
+ celt_sig * OPUS_RESTRICT f;
+ const celt_norm * OPUS_RESTRICT x;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
bound = M*eBands[end];
if (downsample!=1)
bound = IMIN(bound, N/downsample);
- celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
if (silence)
{
bound = 0;
start = end = 0;
}
- c=0; do {
- celt_sig * OPUS_RESTRICT f;
- const celt_norm * OPUS_RESTRICT x;
- f = freq+c*N;
- x = X+c*N+M*eBands[start];
- for (i=0;i<M*eBands[start];i++)
- *f++ = 0;
- for (i=start;i<end;i++)
- {
- int j, band_end;
- opus_val16 g;
- opus_val16 lg;
+ f = freq;
+ x = X+M*eBands[start];
+ for (i=0;i<M*eBands[start];i++)
+ *f++ = 0;
+ for (i=start;i<end;i++)
+ {
+ int j, band_end;
+ opus_val16 g;
+ opus_val16 lg;
#ifdef FIXED_POINT
- int shift;
+ int shift;
#endif
- j=M*eBands[i];
- band_end = M*eBands[i+1];
- lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+ j=M*eBands[i];
+ band_end = M*eBands[i+1];
+ lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
#ifndef FIXED_POINT
- g = celt_exp2(lg);
+ g = celt_exp2(lg);
#else
- /* Handle the integer part of the log energy */
- shift = 16-(lg>>DB_SHIFT);
- if (shift>31)
- {
- shift=0;
- g=0;
- } else {
- /* Handle the fractional part. */
- g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
- }
- /* Handle extreme gains with negative shift. */
- if (shift<0)
- {
- /* For shift < -2 we'd be likely to overflow, so we're capping
+ /* Handle the integer part of the log energy */
+ shift = 16-(lg>>DB_SHIFT);
+ if (shift>31)
+ {
+ shift=0;
+ g=0;
+ } else {
+ /* Handle the fractional part. */
+ g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+ }
+ /* Handle extreme gains with negative shift. */
+ if (shift<0)
+ {
+ /* For shift < -2 we'd be likely to overflow, so we're capping
the gain here. This shouldn't happen unless the bitstream is
already corrupted. */
- if (shift < -2)
- {
- g = 32767;
- shift = -2;
- }
- do {
- *f++ = SHL32(MULT16_16(*x++, g), -shift);
- } while (++j<band_end);
- } else
+ if (shift < -2)
+ {
+ g = 32767;
+ shift = -2;
+ }
+ do {
+ *f++ = SHL32(MULT16_16(*x++, g), -shift);
+ } while (++j<band_end);
+ } else
#endif
/* Be careful of the fixed-point "else" just above when changing this code */
do {
*f++ = SHR32(MULT16_16(*x++, g), shift);
} while (++j<band_end);
- }
- celt_assert(start <= end);
- OPUS_CLEAR(&freq[c*N+bound], N-bound);
- } while (++c<C);
+ }
+ celt_assert(start <= end);
+ OPUS_CLEAR(&freq[bound], N-bound);
}
/* This prevents energy collapse for transients with multiple short MDCTs */
--- a/celt/bands.h
+++ b/celt/bands.h
@@ -60,7 +60,7 @@
*/
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
- int end, int C, int M, int downsample, int silence);
+ int end, int M, int downsample, int silence);
#define SPREAD_NONE (0)
#define SPREAD_LIGHT (1)
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -205,10 +205,10 @@
void init_caps(const CELTMode *m,int *cap,int LM,int C);
#ifdef RESYNTH
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
-
-void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
- celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+ opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
+ int LM, int downsample, int silence);
#endif
#ifdef __cplusplus
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -190,13 +190,17 @@
#ifndef RESYNTH
static
#endif
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
+ celt_sig *mem)
{
int c;
int Nd;
int apply_downsampling=0;
opus_val16 coef0;
+ VARDECL(celt_sig, scratch);
+ SAVE_STACK;
+ ALLOC(scratch, N, celt_sig);
coef0 = coef[0];
Nd = N/downsample;
c=0; do {
@@ -250,6 +254,7 @@
y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
}
} while (++c<C);
+ RESTORE_STACK;
}
/** Compute the IMDCT and apply window for all sub-frames and
@@ -258,9 +263,9 @@
static
#endif
void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
- celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+ celt_sig * OPUS_RESTRICT out_mem, int LM)
{
- int b, c;
+ int b;
int B;
int N;
int shift;
@@ -276,13 +281,69 @@
N = mode->shortMdctSize<<LM;
shift = mode->maxLM-LM;
}
- c=0; do {
- /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
- for (b=0;b<B;b++)
- clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
- } while (++c<C);
+ /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+ for (b=0;b<B;b++)
+ clt_mdct_backward(&mode->mdct, &X[b], out_mem+N*b, mode->window, overlap, shift, B);
}
+#ifndef RESYNTH
+static
+#endif
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+ opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
+ int LM, int downsample, int silence)
+{
+ int c, i;
+ int M, N;
+ int nbEBands;
+ int shortBlocks;
+ int overlap;
+ VARDECL(celt_sig, freq);
+ SAVE_STACK;
+
+ overlap = mode->overlap;
+ nbEBands = mode->nbEBands;
+ N = mode->shortMdctSize<<LM;
+ ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */
+ M = 1<<LM;
+ shortBlocks = isTransient ? M : 0;
+
+ if (CC==2&&C==1)
+ {
+ /* Copying a mono streams to two channels */
+ celt_sig *freq2;
+ denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+ downsample, silence);
+ /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */
+ freq2 = out_syn[1]+overlap/2;
+ OPUS_COPY(freq2, freq, N);
+ compute_inv_mdcts(mode, shortBlocks, freq2, out_syn[0], LM);
+ compute_inv_mdcts(mode, shortBlocks, freq, out_syn[1], LM);
+ } else if (CC==1&&C==2)
+ {
+ /* Downmixing a stereo stream to mono */
+ celt_sig *freq2;
+ freq2 = out_syn[0]+overlap/2;
+ denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+ downsample, silence);
+ /* Use the output buffer as temp array before downmixing. */
+ denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
+ downsample, silence);
+ for (i=0;i<N;i++)
+ freq[i] = HALF32(ADD32(freq[i],freq2[i]));
+ /* Compute inverse MDCTs */
+ compute_inv_mdcts(mode, shortBlocks, freq, out_syn[0], LM);
+ } else {
+ /* Normal case (mono or stereo) */
+ c=0; do {
+ denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
+ downsample, silence);
+ compute_inv_mdcts(mode, shortBlocks, freq, out_syn[c], LM);
+ } while (++c<CC);
+ }
+ RESTORE_STACK;
+}
+
static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
{
int i, curr, tf_select;
@@ -347,7 +408,6 @@
int loss_count;
int noise_based;
const opus_int16 *eBands;
- VARDECL(celt_sig, scratch);
SAVE_STACK;
mode = st->mode;
@@ -369,11 +429,9 @@
start = st->start;
downsample = st->downsample;
noise_based = loss_count >= 5 || start != 0;
- ALLOC(scratch, noise_based?N*C:N, celt_sig);
if (noise_based)
{
/* Noise-based PLC/CNG */
- celt_sig *freq;
VARDECL(celt_norm, X);
opus_uint32 seed;
opus_val16 *plcLogE;
@@ -385,7 +443,6 @@
/* Share the interleaved signal MDCT coefficient buffer with the
deemphasis scratch buffer. */
- freq = scratch;
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
if (loss_count >= 5)
@@ -421,14 +478,12 @@
}
st->rng = seed;
- denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM,
- downsample, 0);
-
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
DECODE_BUFFER_SIZE-N+(overlap>>1));
} while (++c<C);
- compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+
+ celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0);
} else {
/* Pitch-based PLC */
const opus_val16 *window;
@@ -639,7 +694,7 @@
}
deemphasis(out_syn, pcm, N, C, downsample,
- mode->preemph, st->preemph_memD, scratch);
+ mode->preemph, st->preemph_memD);
st->loss_count = loss_count+1;
@@ -909,8 +964,6 @@
anti_collapse(mode, X, collapse_masks, LM, C, N,
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
- ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
-
if (silence)
{
for (i=0;i<C*nbEBands;i++)
@@ -917,10 +970,6 @@
oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
}
- /* Synthesis */
- denormalise_bands(mode, X, freq, oldBandE, start, effEnd, C, M,
- st->downsample, silence);
-
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
} while (++c<CC);
@@ -929,17 +978,8 @@
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<CC);
- if (CC==2&&C==1)
- OPUS_COPY(freq+N, freq, N);
- if (CC==1&&C==2)
- {
- for (i=0;i<N;i++)
- freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
- }
+ celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence);
- /* Compute inverse MDCTs */
- compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
-
c=0; do {
st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
@@ -995,7 +1035,7 @@
st->rng = dec->rng;
/* We reuse freq[] as scratch space for the de-emphasis */
- deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+ deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD);
st->loss_count = 0;
RESTORE_STACK;
if (ec_tell(dec) > 8*len)
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -1973,25 +1973,15 @@
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
}
- /* Synthesis */
- denormalise_bands(mode, X, freq, oldBandE, start, effEnd, C, M,
- st->upsample, silence);
-
c=0; do {
OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
} while (++c<CC);
- if (CC==2&&C==1)
- {
- for (i=0;i<N;i++)
- freq[N+i] = freq[i];
- }
-
c=0; do {
out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
} while (++c<CC);
- compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM);
+ celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd, C, CC, isTransient, LM, st->upsample, silence);
c=0; do {
st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
@@ -2006,7 +1996,7 @@
} while (++c<CC);
/* We reuse freq[] as scratch space for the de-emphasis */
- deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq);
+ deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
st->prefilter_period_old = st->prefilter_period;
st->prefilter_gain_old = st->prefilter_gain;
st->prefilter_tapset_old = st->prefilter_tapset;