ref: 12b22484e2b994a21b5e2ee4f36eecf22c448424
parent: b0c153b15d7647771996f175414285fee27340c6
author: Jean-Marc Valin <[email protected]>
date: Mon Jun 16 10:13:05 EDT 2008
Implemented two pre-echo avoidance techniques: time-domain pre-emphasis and per-band IDCT.
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -42,6 +42,23 @@
#include "os_support.h"
#include "mathops.h"
+static void dctIV(float *X, int len, int dim)
+{
+ int d, n, k;
+ for (d=0;d<dim;d++)
+ {
+ float x[len];
+ for (n=0;n<len;n++)
+ x[n] = X[dim*n+d];
+ for (k=0;k<len;k++)
+ {
+ float sum = 0;
+ for (n=0;n<len;n++)
+ sum += x[n]*cos(M_PI/len*(n+.5)*(k+.5));
+ X[dim*k+d] = sqrt(2.f/len)*sum;
+ }
+ }
+}
#if 0
void exp_rotation(celt_norm_t *X, int len, int dir, int stride, int iter)
{
@@ -377,7 +394,7 @@
/* Quantisation of the residual */
-void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_enc *enc)
+void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_enc *enc)
{
int i, j, bits;
const celt_int16_t * restrict eBands = m->eBands;
@@ -411,14 +428,19 @@
q = pulses[i];
n = SHL16(celt_sqrt(C*(eBands[i+1]-eBands[i])),11);
+ if (time_domain)
+ dctIV(X+C*eBands[i], eBands[i+1]-eBands[i], C);
/* If pitch isn't available, use intra-frame prediction */
if (eBands[i] >= m->pitchEnd || q<=0)
{
q -= 1;
- if (q<0)
- intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
- else
- intra_prediction(m, X+C*eBands[i], W+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], enc);
+ if (!time_domain)
+ {
+ if (q<0)
+ intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
+ else
+ intra_prediction(m, X+C*eBands[i], W+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], enc);
+ }
}
if (q > 0)
@@ -438,6 +460,8 @@
for (j=C*eBands[i];j<C*eBands[i+1];j++)
X[j] = P[j];
}
+ if (time_domain)
+ dctIV(X+C*eBands[i], eBands[i+1]-eBands[i], C);
for (j=C*eBands[i];j<C*eBands[i+1];j++)
norm[j] = MULT16_16_Q15(n,X[j]);
}
@@ -445,7 +469,7 @@
}
/* Decoding of the residual */
-void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_dec *dec)
+void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_dec *dec)
{
int i, j, bits;
const celt_int16_t * restrict eBands = m->eBands;
@@ -478,10 +502,13 @@
if (eBands[i] >= m->pitchEnd || q<=0)
{
q -= 1;
- if (q<0)
- intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
- else
- intra_unquant(m, X+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], dec);
+ if (!time_domain)
+ {
+ if (q<0)
+ intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
+ else
+ intra_unquant(m, X+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], dec);
+ }
}
if (q > 0)
@@ -498,6 +525,8 @@
for (j=C*eBands[i];j<C*eBands[i+1];j++)
X[j] = P[j];
}
+ if (time_domain)
+ dctIV(X+C*eBands[i], eBands[i+1]-eBands[i], C);
for (j=C*eBands[i];j<C*eBands[i+1];j++)
norm[j] = MULT16_16_Q15(n,X[j]);
}
--- a/libcelt/bands.h
+++ b/libcelt/bands.h
@@ -86,7 +86,7 @@
* @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
* @param enc Entropy encoder
*/
-void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_enc *enc);
+void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_enc *enc);
/** Decoding of the residual spectrum
* @param m Mode data
@@ -95,7 +95,7 @@
* @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
* @param dec Entropy decoder
*/
-void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_dec *dec);
+void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_dec *dec);
void stereo_decision(const CELTMode *m, celt_norm_t * restrict X, int *stereo_mode, int len);
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -52,7 +52,10 @@
static const celt_word16_t preemph = QCONST16(0.8f,15);
-
+static const float gainWindow[16] = {
+ 0.0085135, 0.0337639, 0.0748914, 0.1304955, 0.1986827, 0.2771308, 0.3631685, 0.4538658,
+ 0.5461342, 0.6368315, 0.7228692, 0.8013173, 0.8695045, 0.9251086, 0.9662361, 0.9914865};
+
/** Encoder state
@brief Encoder state
*/
@@ -187,7 +190,7 @@
}
/** Compute the IMDCT and apply window for all sub-frames and all channels in a frame */
-static void compute_inv_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t *X, celt_sig_t * restrict out_mem)
+static void compute_inv_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t *X, int transient_time, float transient_gain, celt_sig_t * restrict out_mem)
{
int c, N4;
const int C = CHANNELS(mode);
@@ -198,7 +201,7 @@
for (c=0;c<C;c++)
{
int j;
- if (C==1) {
+ if (transient_time<0 && C==1) {
mdct_backward(lookup, X, out_mem+C*(MAX_PERIOD-N-N4), window, overlap);
} else {
VARDECL(celt_word32_t, x);
@@ -212,6 +215,13 @@
/* Prevents problems from the imdct doing the overlap-add */
CELT_MEMSET(x+N4, 0, overlap);
mdct_backward(lookup, tmp, x, window, overlap);
+ if (transient_time >= 0)
+ {
+ for (j=0;j<16;j++)
+ x[N4+transient_time+j-16] *= 1+gainWindow[j]*(transient_gain-1);
+ for (j=transient_time;j<N+overlap;j++)
+ x[N4+j] *= transient_gain;
+ }
/* The first and last part would need to be set to zero if we actually
wanted to use them. */
for (j=0;j<overlap;j++)
@@ -241,6 +251,9 @@
#ifdef EXP_PSY
VARDECL(celt_word32_t, mask);
#endif
+ int time_domain=0;
+ int transient_time;
+ float transient_gain;
const int C = CHANNELS(st->mode);
SAVE_STACK;
@@ -267,9 +280,76 @@
}
}
CELT_COPY(st->in_mem, in+C*(2*N-2*N4-st->overlap), C*st->overlap);
-
+ if (1) {
+ int len = N+st->overlap;
+ float maxR, maxD;
+ float begin[C*len], end[C*len];
+ begin[0] = in[0]*in[0];
+ for (i=1;i<len*C;i++)
+ begin[i] = begin[i-1]+in[i]*in[i];
+ end[len-1] = in[len-1]*in[len-1];
+ for (i=C*len-2;i>=0;i--)
+ end[i] = end[i+1] + in[i]*in[i];
+ maxD = 0;
+ maxR = 1;
+ transient_time = -1;
+ for (i=8*C;i<C*(len-8);i++)
+ {
+ float diff = sqrt(sqrt(end[i]/(C*len-i)))-sqrt(sqrt(begin[i]/(i)));
+ float ratio = ((1000+end[i])*i)/((1000+begin[i])*(C*len-i));
+ if (diff > maxD)
+ {
+ maxD = diff;
+ maxR = ratio;
+ transient_time = i;
+ }
+ }
+ transient_time /= C;
+ if (transient_time<32)
+ {
+ transient_time = -1;
+ maxR = 0;
+ }
+ if (maxR > 20)
+ {
+ float gain_1;
+ ec_enc_bits(&st->enc, 1, 1);
+ if (maxR < 30)
+ {
+ transient_gain = 1;
+ ec_enc_bits(&st->enc, 0, 2);
+ } else if (maxR < 100)
+ {
+ transient_gain = 2;
+ ec_enc_bits(&st->enc, 1, 2);
+ } else if (maxR < 500)
+ {
+ transient_gain = 4;
+ ec_enc_bits(&st->enc, 2, 2);
+ } else
+ {
+ transient_gain = 8;
+ ec_enc_bits(&st->enc, 3, 2);
+ }
+ ec_enc_uint(&st->enc, transient_time, len);
+ for (c=0;c<C;c++)
+ for (i=0;i<16;i++)
+ in[C*(transient_time+i-16)+c] /= 1+gainWindow[i]*(transient_gain-1);
+ gain_1 = 1./transient_gain;
+ for (c=0;c<C;c++)
+ for (i=transient_time;i<len;i++)
+ in[C*i+c] *= gain_1;
+ time_domain = 1;
+ } else {
+ ec_enc_bits(&st->enc, 0, 1);
+ transient_time = -1;
+ transient_gain = 1;
+ time_domain = 0;
+ }
+ }
/* Pitch analysis: we do it early to save on the peak stack space */
- find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
+ if (!time_domain)
+ find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
@@ -316,7 +396,8 @@
/*for (i=0;i<N*B*C;i++)printf("%f ", X[i]);printf("\n");*/
/* Compute MDCTs of the pitch part */
- compute_mdcts(st->mode, st->mode->window, st->out_mem+pitch_index*C, freq);
+ if (!time_domain)
+ compute_mdcts(st->mode, st->mode->window, st->out_mem+pitch_index*C, freq);
{
/* Normalise the pitch vector as well (discard the energies) */
@@ -328,7 +409,7 @@
}
curr_power = bandE[0]+bandE[1]+bandE[2];
/* Check if we can safely use the pitch (i.e. effective gain isn't too high) */
- if (MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power)
+ if (!time_domain && (MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power))
{
/* Simulates intensity stereo */
/*for (i=30;i<N*B;i++)
@@ -357,7 +438,7 @@
/*for (i=0;i<B*N;i++) printf("%f ",P[i]);printf("\n");*/
/* Residual quantisation */
- quant_bands(st->mode, X, P, NULL, bandE, stereo_mode, nbCompressedBytes*8, &st->enc);
+ quant_bands(st->mode, X, P, NULL, bandE, stereo_mode, nbCompressedBytes*8, time_domain, &st->enc);
if (C==2)
{
@@ -369,13 +450,13 @@
CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->overlap-N));
- compute_inv_mdcts(st->mode, st->mode->window, freq, st->out_mem);
+ compute_inv_mdcts(st->mode, st->mode->window, freq, transient_time, transient_gain, st->out_mem);
/* De-emphasis and put everything back at the right place in the synthesis history */
#ifndef SHORTCUTS
for (c=0;c<C;c++)
{
int j;
- const celt_sig_t * restrict outp=st->out_mem+C*(MAX_PERIOD-N)+c;
+ celt_sig_t * restrict outp=st->out_mem+C*(MAX_PERIOD-N)+c;
celt_int16_t * restrict pcmp = pcm+c;
for (j=0;j<N;j++)
{
@@ -536,7 +617,7 @@
CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->mode->overlap-N));
/* Compute inverse MDCTs */
- compute_inv_mdcts(st->mode, st->mode->window, freq, st->out_mem);
+ compute_inv_mdcts(st->mode, st->mode->window, freq, -1, 1, st->out_mem);
for (c=0;c<C;c++)
{
@@ -565,6 +646,9 @@
VARDECL(celt_ener_t, bandE);
VARDECL(celt_pgain_t, gains);
VARDECL(int, stereo_mode);
+ int time_domain;
+ int transient_time;
+ float transient_gain;
const int C = CHANNELS(st->mode);
SAVE_STACK;
@@ -595,6 +679,30 @@
ec_byte_readinit(&buf,data,len);
ec_dec_init(&dec,&buf);
+ time_domain = ec_dec_bits(&dec, 1);
+ if (time_domain)
+ {
+ int gainid = ec_dec_bits(&dec, 2);
+ switch(gainid) {
+ case 0:
+ transient_gain = 1;
+ break;
+ case 1:
+ transient_gain = 2;
+ break;
+ case 2:
+ transient_gain = 4;
+ break;
+ case 3:
+ default:
+ transient_gain = 8;
+ break;
+ }
+ transient_time = ec_dec_uint(&dec, N+st->mode->overlap);
+ } else {
+ transient_time = -1;
+ transient_gain = 1;
+ }
/* Get the pitch gains */
has_pitch = unquant_pitch(gains, st->mode->nbPBands, &dec);
@@ -627,7 +735,7 @@
pitch_quant_bands(st->mode, P, gains);
/* Decode fixed codebook and merge with pitch */
- unquant_bands(st->mode, X, P, bandE, stereo_mode, len*8, &dec);
+ unquant_bands(st->mode, X, P, bandE, stereo_mode, len*8, time_domain, &dec);
if (C==2)
{
@@ -639,7 +747,7 @@
CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->overlap-N));
/* Compute inverse MDCTs */
- compute_inv_mdcts(st->mode, st->mode->window, freq, st->out_mem);
+ compute_inv_mdcts(st->mode, st->mode->window, freq, transient_time, transient_gain, st->out_mem);
for (c=0;c<C;c++)
{