ref: 163b76e8c4a8ad3f33199e4d72526149a1e34911
parent: 8035b6589dcbb10e67080088ea6ac837ed3a346d
author: Jean-Marc Valin <[email protected]>
date: Thu May 27 19:56:53 EDT 2010
Adaptive time-frequency resolution Encoding the optimal tf-tradeoff for each band and then applying it during quantization.
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -469,7 +469,7 @@
in two and transmit the energy difference with the two half-bands. It
can be called recursively so bands can end up being split in 8 parts. */
static void quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y,
- int N, int b, int spread, celt_norm *lowband, int resynth, ec_enc *ec,
+ int N, int b, int spread, int tf_change, celt_norm *lowband, int resynth, ec_enc *ec,
celt_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level)
{
int q;
@@ -482,7 +482,6 @@
int spread0=spread;
int time_divide=0;
int recombine=0;
- int tf_change=-1;
if (spread)
N_B /= spread;
@@ -538,7 +537,7 @@
}
/* Increasing the time resolution */
- if (!stereo && spread>1 && level==0)
+ if (!stereo && level==0)
{
while ((N_B&1) == 0 && tf_change<0 && spread <= (1<<LM))
{
@@ -553,14 +552,14 @@
}
spread0 = spread;
N_B0 = N_B;
- if (spread0>1)
- {
- if (encode)
- deinterleave_vector(X, N_B, spread0);
- if (lowband)
- deinterleave_vector(lowband, N_B, spread0);
- }
}
+ if (!stereo && spread0>1 && level==0)
+ {
+ if (encode)
+ deinterleave_vector(X, N_B, spread0);
+ if (lowband)
+ deinterleave_vector(lowband, N_B, spread0);
+ }
/* If we need more than 32 bits, try splitting the band in two. */
if (!stereo && LM != -1 && b > 32<<BITRES && N>2)
@@ -730,7 +729,7 @@
else
sign = -1;
}
- quant_band(encode, m, i, v, NULL, N, mbits, spread, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level+1);
+ quant_band(encode, m, i, v, NULL, N, mbits, spread, tf_change, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level+1);
if (sbits)
{
if (encode)
@@ -782,8 +781,8 @@
else
next_level = level+1;
- quant_band(encode, m, i, X, NULL, N, mbits, spread, lowband, resynth, ec, remaining_bits, LM, next_lowband_out1, NULL, next_level);
- quant_band(encode, m, i, Y, NULL, N, sbits, spread, next_lowband2, resynth, ec, remaining_bits, LM, NULL, NULL, level);
+ quant_band(encode, m, i, X, NULL, N, mbits, spread, tf_change, lowband, resynth, ec, remaining_bits, LM, next_lowband_out1, NULL, next_level);
+ quant_band(encode, m, i, Y, NULL, N, sbits, spread, tf_change, next_lowband2, resynth, ec, remaining_bits, LM, NULL, NULL, level);
}
} else {
@@ -826,13 +825,15 @@
Y[j] = MULT16_16_Q15(Y[j], side);
}
-
if (!stereo && spread0>1 && level==0)
{
- int k;
interleave_vector(X, N_B, spread0);
if (lowband)
interleave_vector(lowband, N_B, spread0);
+ }
+ if (time_divide)
+ {
+ int k;
N_B = N_B0;
spread = spread0;
for (k=0;k<time_divide;k++)
@@ -878,7 +879,7 @@
}
}
-void quant_all_bands(int encode, const CELTMode *m, int start, celt_norm *_X, celt_norm *_Y, const celt_ener *bandE, int *pulses, int shortBlocks, int fold, int resynth, int total_bits, ec_enc *ec, int LM)
+void quant_all_bands(int encode, const CELTMode *m, int start, celt_norm *_X, celt_norm *_Y, const celt_ener *bandE, int *pulses, int shortBlocks, int fold, int *tf_res, int resynth, int total_bits, ec_enc *ec, int LM)
{
int i, remaining_bits, balance;
const celt_int16 * restrict eBands = m->eBands;
@@ -904,6 +905,7 @@
int curr_balance;
celt_norm * restrict X, * restrict Y;
celt_norm *lowband;
+ int tf_change=0;
X = _X+M*eBands[i];
if (_Y!=NULL)
@@ -931,7 +933,14 @@
lowband = norm+M*eBands[i]-N;
else
lowband = NULL;
- quant_band(encode, m, i, X, Y, N, b, spread, lowband, resynth, ec, &remaining_bits, LM, norm+M*eBands[i], bandE, 0);
+
+ if (shortBlocks)
+ {
+ tf_change = tf_res[i] ? -1 : 2;
+ } else {
+ tf_change = tf_res[i] ? -2 : 0;
+ }
+ quant_band(encode, m, i, X, Y, N, b, spread, tf_change, lowband, resynth, ec, &remaining_bits, LM, norm+M*eBands[i], bandE, 0);
balance += pulses[i] + tell;
}
--- a/libcelt/bands.h
+++ b/libcelt/bands.h
@@ -85,7 +85,7 @@
* @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
* @param enc Entropy encoder
*/
-void quant_all_bands(int encode, const CELTMode *m, int start, celt_norm * X, celt_norm * Y, const celt_ener *bandE, int *pulses, int time_domain, int fold, int resynth, int total_bits, ec_enc *enc, int M);
+void quant_all_bands(int encode, const CELTMode *m, int start, celt_norm * X, celt_norm * Y, const celt_ener *bandE, int *pulses, int time_domain, int fold, int *tf_res, int resynth, int total_bits, ec_enc *enc, int M);
void stereo_decision(const CELTMode *m, celt_norm * restrict X, int *stereo_mode, int len, int M);
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -541,6 +541,65 @@
renormalise_bands(mode, X, C, M);
}
+static void tf_encode(celt_word16 *bandLogE, celt_word16 *oldBandE, int len, int C, int isTransient, int *tf_res, ec_enc *enc)
+{
+ int i, curr;
+ celt_word16 thresh1, thresh2;
+ VARDECL(celt_word16, metric);
+ SAVE_STACK;
+
+ ALLOC(metric, len, celt_word16);
+ for (i=0;i<len;i++)
+ metric[i] = bandLogE[i] - oldBandE[i];
+ if (C==2)
+ for (i=0;i<len;i++)
+ metric[i] = HALF32(metric[i] + (bandLogE[i+len] - oldBandE[i+len]));
+
+ for (i=1;i<len-1;i++)
+ metric[i] = (2*metric[i]+metric[i-1]+metric[i+1])/4;
+
+ if (isTransient)
+ {
+ thresh1 = QCONST16(1.5f,DB_SHIFT);
+ thresh2 = QCONST16(.5f,DB_SHIFT);
+ } else {
+ thresh1 = QCONST16(1.5f,DB_SHIFT);
+ thresh2 = QCONST16(.8f,DB_SHIFT);
+ }
+ curr = 0;
+ for (i=0;i<len;i++)
+ {
+ if (metric[i]>thresh1)
+ tf_res[i] = 1;
+ else if (metric[i]>thresh2)
+ tf_res[i] = curr;
+ else
+ tf_res[i] = 0;
+ }
+ for (i=1;i<len-1;i++)
+ if (tf_res[i] != tf_res[i-1] && tf_res[i] != tf_res[i+1])
+ tf_res[i] = tf_res[i+1];
+ curr = 0;
+ for (i=0;i<len;i++)
+ {
+ ec_enc_bit_prob(enc, tf_res[i], curr ? 240: 16);
+ curr = tf_res[i];
+ }
+ RESTORE_STACK
+}
+
+static void tf_decode(int len, int C, int isTransient, int *tf_res, ec_dec *dec)
+{
+ int i, curr;
+ curr = 0;
+ for (i=0;i<len;i++)
+ {
+ tf_res[i] = ec_dec_bit_prob(dec, curr ? 240: 16);
+ curr = tf_res[i];
+ }
+
+}
+
#ifdef FIXED_POINT
int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, celt_int16 * optional_resynthesis, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
{
@@ -567,6 +626,7 @@
VARDECL(int, pulses);
VARDECL(int, offsets);
VARDECL(int, fine_priority);
+ VARDECL(int, tf_res);
int intra_ener = 0;
int shortBlocks=0;
int isTransient=0;
@@ -805,6 +865,9 @@
nbCompressedBytes = max_allowed;
}
+ ALLOC(tf_res, st->mode->nbEBands, int);
+ tf_encode(bandLogE, st->oldBandE, st->mode->nbEBands, C, isTransient, tf_res, enc);
+
/* Bit allocation */
ALLOC(error, C*st->mode->nbEBands, celt_word16);
coarse_needed = quant_coarse_energy(st->mode, st->start, bandLogE, st->oldBandE, nbCompressedBytes*4-8, intra_ener, st->mode->prob, error, enc, C);
@@ -879,7 +942,7 @@
quant_fine_energy(st->mode, st->start, bandE, st->oldBandE, error, fine_quant, enc, C);
/* Residual quantisation */
- quant_all_bands(1, st->mode, st->start, X, C==2 ? X+N : NULL, bandE, pulses, shortBlocks, has_fold, resynth, nbCompressedBytes*8, enc, LM);
+ quant_all_bands(1, st->mode, st->start, X, C==2 ? X+N : NULL, bandE, pulses, shortBlocks, has_fold, tf_res, resynth, nbCompressedBytes*8, enc, LM);
quant_energy_finalise(st->mode, st->start, bandE, st->oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_enc_tell(enc, 0), enc, C);
@@ -1487,6 +1550,7 @@
VARDECL(int, pulses);
VARDECL(int, offsets);
VARDECL(int, fine_priority);
+ VARDECL(int, tf_res);
int shortBlocks;
int isTransient;
@@ -1575,6 +1639,9 @@
pitch_index = 0;
}
+ ALLOC(tf_res, st->mode->nbEBands, int);
+ tf_decode(st->mode->nbEBands, C, isTransient, tf_res, dec);
+
ALLOC(fine_quant, st->mode->nbEBands, int);
/* Get band energies */
unquant_coarse_energy(st->mode, st->start, bandE, st->oldBandE, len*4-8, intra_ener, st->mode->prob, dec, C);
@@ -1601,7 +1668,7 @@
}
/* Decode fixed codebook and merge with pitch */
- quant_all_bands(0, st->mode, st->start, X, C==2 ? X+N : NULL, NULL, pulses, shortBlocks, has_fold, 1, len*8, dec, LM);
+ quant_all_bands(0, st->mode, st->start, X, C==2 ? X+N : NULL, NULL, pulses, shortBlocks, has_fold, tf_res, 1, len*8, dec, LM);
unquant_energy_finalise(st->mode, st->start, bandE, st->oldBandE, fine_quant, fine_priority, len*8-ec_dec_tell(dec, 0), dec, C);