ref: 8eb9bb793976cf62e315416e9f42809bed909244
parent: 22d8dcf398ca7ebba8e35231cdd1e3afc373a9cd
author: Timothy B. Terriberry <[email protected]>
date: Fri Dec 7 09:07:22 EST 2012
Numerous PLC cleanups. This should reduce computation, reduce stack usage, and be substantially easier to read, but should not change behavior. It's not quite bit-exact because I collapsed the application of the decay and the fading into a single pass to save one multiply per sample, but the difference signal is silent. It also changes the comments into complete sentences.
--- a/celt/bands.c
+++ b/celt/bands.c
@@ -212,6 +212,7 @@
x++;
} while (++j<band_end);
}
+ celt_assert(start <= end);
for (i=M*eBands[end];i<N;i++)
*f++ = 0;
} while (++c<C);
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -301,23 +301,30 @@
}
}
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+ CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+ current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+ pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
{
int c;
- int pitch_index;
- opus_val16 fade = Q15ONE;
- int i, len;
+ int i;
const int C = st->channels;
- int offset;
- celt_sig *out_mem[2];
celt_sig *decode_mem[2];
+ celt_sig *out_syn[2];
opus_val16 *lpc;
- opus_val32 *out_syn[2];
opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
const OpusCustomMode *mode;
int nbEBands;
int overlap;
+ int start;
+ int downsample;
+ int loss_count;
+ int noise_based;
const opus_int16 *eBands;
VARDECL(celt_sig, scratch);
SAVE_STACK;
@@ -328,54 +335,56 @@
eBands = mode->eBands;
c=0; do {
- decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
- out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
+ decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+ out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<C);
- lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C);
+ lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
oldBandE = lpc+C*LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
oldLogE2 = oldLogE + 2*nbEBands;
backgroundLogE = oldLogE2 + 2*nbEBands;
- c=0; do {
- out_syn[c] = out_mem[c]+MAX_PERIOD-N;
- } while (++c<C);
-
- len = N+overlap;
-
- if (st->loss_count >= 5 || st->start!=0)
+ loss_count = st->loss_count;
+ start = st->start;
+ downsample = st->downsample;
+ noise_based = loss_count >= 5 || start != 0;
+ ALLOC(scratch, noise_based?N*C:N, celt_sig);
+ if (noise_based)
{
/* Noise-based PLC/CNG */
- VARDECL(celt_sig, freq);
+ celt_sig *freq;
VARDECL(celt_norm, X);
VARDECL(celt_ener, bandE);
opus_uint32 seed;
+ int end;
int effEnd;
- effEnd = st->end;
- if (effEnd > mode->effEBands)
- effEnd = mode->effEBands;
+ end = st->end;
+ effEnd = IMAX(start, IMIN(end, mode->effEBands));
- ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
+ /* Share the interleaved signal MDCT coefficient buffer with the
+ deemphasis scratch buffer. */
+ freq = scratch;
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
ALLOC(bandE, nbEBands*C, celt_ener);
- if (st->loss_count >= 5)
- log2Amp(mode, st->start, st->end, bandE, backgroundLogE, C);
+ if (loss_count >= 5)
+ log2Amp(mode, start, end, bandE, backgroundLogE, C);
else {
/* Energy decay */
- opus_val16 decay = st->loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+ opus_val16 decay = loss_count==0 ?
+ QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
c=0; do
{
- for (i=st->start;i<st->end;i++)
+ for (i=start;i<end;i++)
oldBandE[c*nbEBands+i] -= decay;
} while (++c<C);
- log2Amp(mode, st->start, st->end, bandE, oldBandE, C);
+ log2Amp(mode, start, end, bandE, oldBandE, C);
}
seed = st->rng;
for (c=0;c<C;c++)
{
- for (i=st->start;i<mode->effEBands;i++)
+ for (i=start;i<effEnd;i++)
{
int j;
int boffs;
@@ -392,34 +401,35 @@
}
st->rng = seed;
- denormalise_bands(mode, X, freq, bandE, st->start, mode->effEBands, C, 1<<LM);
+ denormalise_bands(mode, X, freq, bandE, start, effEnd, C, 1<<LM);
c=0; do {
int bound = eBands[effEnd]<<LM;
- if (st->downsample!=1)
- bound = IMIN(bound, N/st->downsample);
+ if (downsample!=1)
+ bound = IMIN(bound, N/downsample);
for (i=bound;i<N;i++)
freq[c*N+i] = 0;
} while (++c<C);
c=0; do {
- OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap);
+ OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
+ DECODE_BUFFER_SIZE-N+(overlap>>1));
} while (++c<C);
compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
} else {
/* Pitch-based PLC */
+ const opus_val16 *window;
+ opus_val16 fade = Q15ONE;
+ int pitch_index;
VARDECL(opus_val32, etmp);
- if (st->loss_count == 0)
+ if (loss_count == 0)
{
- opus_val16 pitch_buf[DECODE_BUFFER_SIZE>>1];
- /* Corresponds to a min pitch of 67 Hz. It's possible to save CPU in this
- search by using only part of the decode buffer */
- int poffset = 720;
- pitch_downsample(decode_mem, pitch_buf, DECODE_BUFFER_SIZE, C);
- /* Max pitch is 100 samples (480 Hz) */
- pitch_search(pitch_buf+((poffset)>>1), pitch_buf, DECODE_BUFFER_SIZE-poffset,
- poffset-100, &pitch_index);
- pitch_index = poffset-pitch_index;
+ opus_val16 lp_pitch_buf[DECODE_BUFFER_SIZE>>1];
+ pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
+ pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+ DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+ PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index);
+ pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
st->last_pitch_index = pitch_index;
} else {
pitch_index = st->last_pitch_index;
@@ -427,6 +437,7 @@
}
ALLOC(etmp, overlap, opus_val32);
+ window = mode->window;
c=0; do {
opus_val16 exc[MAX_PERIOD];
opus_val32 ac[LPC_ORDER+1];
@@ -433,28 +444,30 @@
opus_val16 decay;
opus_val16 attenuation;
opus_val32 S1=0;
- opus_val16 mem[LPC_ORDER]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- opus_val32 *e = out_syn[c];
+ opus_val16 lpc_mem[LPC_ORDER];
+ celt_sig *buf;
+ int extrapolation_offset;
+ int extrapolation_len;
+ int exc_length;
+ int j;
+ buf = decode_mem[c];
+ for (i=0;i<MAX_PERIOD;i++) {
+ exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
+ }
- offset = MAX_PERIOD-pitch_index;
- for (i=0;i<MAX_PERIOD;i++)
- exc[i] = ROUND16(out_mem[c][i], SIG_SHIFT);
-
- /* Compute LPC coefficients for the last MAX_PERIOD samples before the loss so we can
- work in the excitation-filter domain */
- if (st->loss_count == 0)
+ if (loss_count == 0)
{
- _celt_autocorr(exc, ac, mode->window, overlap,
- LPC_ORDER, MAX_PERIOD);
-
- /* Noise floor -40 dB */
+ /* Compute LPC coefficients for the last MAX_PERIOD samples before
+ the first loss so we can work in the excitation-filter domain. */
+ _celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD);
+ /* Add a noise floor of -40 dB. */
#ifdef FIXED_POINT
ac[0] += SHR32(ac[0],13);
#else
ac[0] *= 1.0001f;
#endif
- /* Lag windowing */
+ /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
for (i=1;i<=LPC_ORDER;i++)
{
/*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
@@ -461,120 +474,147 @@
#ifdef FIXED_POINT
ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
#else
- ac[i] -= ac[i]*(.008f*i)*(.008f*i);
+ ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
#endif
}
-
_celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
}
- /* Samples just before the beginning of exc */
+ /* We want the excitation for 2 pitch periods in order to look for a
+ decaying signal, but we can't get more than MAX_PERIOD. */
+ exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+ /* Initialize the LPC history with the samples just before the start
+ of the region for which we're computing the excitation. */
for (i=0;i<LPC_ORDER;i++)
- mem[i] = ROUND16(out_mem[c][-1-i], SIG_SHIFT);
- /* Compute the excitation for MAX_PERIOD samples before the loss */
- celt_fir(exc, lpc+c*LPC_ORDER, exc, MAX_PERIOD, LPC_ORDER, mem);
+ {
+ lpc_mem[i] =
+ ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+ }
+ /* Compute the excitation for exc_length samples before the loss. */
+ celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+ exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
- /* Check if the waveform is decaying (and if so how fast)
+ /* Check if the waveform is decaying, and if so how fast.
We do this to avoid adding energy when concealing in a segment
- with decaying energy */
+ with decaying energy. */
{
opus_val32 E1=1, E2=1;
- int period;
- if (pitch_index <= MAX_PERIOD/2)
- period = pitch_index;
- else
- period = MAX_PERIOD/2;
- for (i=0;i<period;i++)
+ int decay_length;
+ decay_length = exc_length>>1;
+ for (i=0;i<decay_length;i++)
{
- E1 += SHR32(MULT16_16(exc[MAX_PERIOD-period+i],exc[MAX_PERIOD-period+i]),8);
- E2 += SHR32(MULT16_16(exc[MAX_PERIOD-2*period+i],exc[MAX_PERIOD-2*period+i]),8);
+ opus_val16 e;
+ e = exc[MAX_PERIOD-decay_length+i];
+ E1 += SHR32(MULT16_16(e, e), 8);
+ e = exc[MAX_PERIOD-2*decay_length+i];
+ E2 += SHR32(MULT16_16(e, e), 8);
}
- if (E1 > E2)
- E1 = E2;
- decay = celt_sqrt(frac_div32(SHR32(E1,1),E2));
- attenuation = decay;
+ E1 = MIN32(E1, E2);
+ decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2));
}
- /* Move memory one frame to the left */
- OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+ /* Move the decoder memory one frame to the left to give us room to
+ add the data for the new frame. We ignore the overlap that extends
+ past the end of the buffer, because we aren't going to use it. */
+ OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N);
- /* Extrapolate excitation with the right period, taking decay into account */
- for (i=0;i<len;i++)
+ /* Extrapolate from the end of the excitation with a period of
+ "pitch_index", scaling down each period by an additional factor of
+ "decay". */
+ extrapolation_offset = MAX_PERIOD-pitch_index;
+ /* We need to extrapolate enough samples to cover a complete MDCT
+ window (including overlap/2 samples on both sides). */
+ extrapolation_len = N+overlap;
+ /* We also apply fading if this is not the first loss. */
+ attenuation = MULT16_16_Q15(fade, decay);
+ for (i=j=0;i<extrapolation_len;i++,j++)
{
opus_val16 tmp;
- if (offset+i >= MAX_PERIOD)
- {
- offset -= pitch_index;
+ if (j >= pitch_index) {
+ j -= pitch_index;
attenuation = MULT16_16_Q15(attenuation, decay);
}
- e[i] = SHL32(EXTEND32(MULT16_16_Q15(attenuation, exc[offset+i])), SIG_SHIFT);
+ buf[DECODE_BUFFER_SIZE-N+i] =
+ SHL32(EXTEND32(MULT16_16_Q15(attenuation,
+ exc[extrapolation_offset+j])), SIG_SHIFT);
/* Compute the energy of the previously decoded signal whose
- excitation we're copying */
- tmp = ROUND16(out_mem[c][-N+offset+i],SIG_SHIFT);
- S1 += SHR32(MULT16_16(tmp,tmp),8);
+ excitation we're copying. */
+ tmp = ROUND16(
+ buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
+ SIG_SHIFT);
+ S1 += SHR32(MULT16_16(tmp, tmp), 8);
}
/* Copy the last decoded samples (prior to the overlap region) to
synthesis filter memory so we can have a continuous signal. */
for (i=0;i<LPC_ORDER;i++)
- mem[i] = ROUND16(out_mem[c][MAX_PERIOD-N-1-i], SIG_SHIFT);
- /* Apply the fading if not the first loss */
- for (i=0;i<len;i++)
- e[i] = MULT16_32_Q15(fade, e[i]);
- /* Synthesis filter -- back in the signal domain */
- celt_iir(e, lpc+c*LPC_ORDER, e, len, LPC_ORDER, mem);
+ lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+ /* Apply the synthesis filter to convert the excitation back into the
+ signal domain. */
+ celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+ buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, lpc_mem);
/* Check if the synthesis energy is higher than expected, which can
- happen with the signal changes during our window. If so, attenuate. */
+ happen with the signal changes during our window. If so,
+ attenuate. */
{
opus_val32 S2=0;
- for (i=0;i<len;i++)
+ for (i=0;i<extrapolation_len;i++)
{
- opus_val16 tmp = ROUND16(e[i],SIG_SHIFT);
- S2 += SHR32(MULT16_16(tmp,tmp),8);
+ opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
+ S2 += SHR32(MULT16_16(tmp, tmp), 8);
}
- /* This checks for an "explosion" in the synthesis */
+ /* This checks for an "explosion" in the synthesis. */
#ifdef FIXED_POINT
if (!(S1 > SHR32(S2,2)))
#else
- /* Float test is written this way to catch NaNs at the same time */
+ /* The float test is written this way to catch NaNs in the output
+ of the IIR filter at the same time. */
if (!(S1 > 0.2f*S2))
#endif
{
- for (i=0;i<len;i++)
- e[i] = 0;
+ for (i=0;i<extrapolation_len;i++)
+ buf[DECODE_BUFFER_SIZE-N+i] = 0;
} else if (S1 < S2)
{
opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
for (i=0;i<overlap;i++)
{
- opus_val16 tmp_g = Q15ONE - MULT16_16_Q15(mode->window[i], Q15ONE-ratio);
- e[i] = MULT16_32_Q15(tmp_g, e[i]);
+ opus_val16 tmp_g = Q15ONE
+ - MULT16_16_Q15(window[i], Q15ONE-ratio);
+ buf[DECODE_BUFFER_SIZE-N+i] =
+ MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
}
- for (i=overlap;i<len;i++)
- e[i] = MULT16_32_Q15(ratio, e[i]);
+ for (i=overlap;i<extrapolation_len;i++)
+ {
+ buf[DECODE_BUFFER_SIZE-N+i] =
+ MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]);
+ }
}
}
- /* Apply pre-filter to the MDCT overlap for the next frame because the
- post-filter will be re-applied in the decoder after the MDCT overlap */
- comb_filter(etmp, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap,
- -st->postfilter_gain, -st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
- NULL, 0);
+ /* Apply the pre-filter to the MDCT overlap for the next frame because
+ the post-filter will be re-applied in the decoder after the MDCT
+ overlap. */
+ comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
+ st->postfilter_period, st->postfilter_period, overlap,
+ -st->postfilter_gain, -st->postfilter_gain,
+ st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
/* Simulate TDAC on the concealed audio so that it blends with the
- MDCT of next frames. */
+ MDCT of the next frame. */
for (i=0;i<overlap/2;i++)
{
- out_mem[c][MAX_PERIOD+i] = MULT16_32_Q15(mode->window[i], etmp[overlap-1-i]) +
- MULT16_32_Q15(mode->window[overlap-i-1], etmp[i]);
+ buf[DECODE_BUFFER_SIZE+i] =
+ MULT16_32_Q15(window[i], etmp[overlap-1-i])
+ + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
}
} while (++c<C);
}
- ALLOC(scratch, N, celt_sig);
- deemphasis(out_syn, pcm, N, C, st->downsample, mode->preemph, st->preemph_memD, scratch);
+ deemphasis(out_syn, pcm, N, C, downsample,
+ mode->preemph, st->preemph_memD, scratch);
- st->loss_count++;
+ st->loss_count = loss_count+1;
RESTORE_STACK;
}