ref: 888d8ce9397df6ca1c4a256d8f44eac6249869b3
parent: 381d05aa0b13843d931542cbb781fca1fb42172f
author: Gregory Maxwell <[email protected]>
date: Thu May 21 00:21:53 EDT 2009
VBR support. VBR API and VBR support in celtenc.
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -78,6 +78,7 @@
int pitch_enabled;
int pitch_available;
int delayedIntra;
+ int VBR_rate; /* Target number of 16th bits per frame */
celt_word16_t * restrict preemph_memE; /* Input is 16-bit, so why bother with 32 */
celt_sig_t * restrict preemph_memD;
@@ -108,6 +109,7 @@
st->block_size = N;
st->overlap = mode->overlap;
+ st->VBR_rate = 0;
st->pitch_enabled = 1;
st->pitch_available = 1;
st->delayedIntra = 1;
@@ -439,6 +441,7 @@
int pitch_index;
int bits;
int has_fold=1;
+ unsigned coarse_needed;
ec_byte_buffer buf;
ec_enc enc;
VARDECL(celt_sig_t, in);
@@ -667,8 +670,29 @@
/* Bit allocation */
ALLOC(error, C*st->mode->nbEBands, celt_word16_t);
- quant_coarse_energy(st->mode, bandE, st->oldBandE, nbCompressedBytes*8/3, intra_ener, st->mode->prob, error, &enc);
+ coarse_needed = quant_coarse_energy(st->mode, bandE, st->oldBandE, nbCompressedBytes*8/3, intra_ener, st->mode->prob, error, &enc);
+ coarse_needed = ((coarse_needed*3-1)>>3)+1;
+
+ /* Variable bitrate */
+ if (st->VBR_rate>0)
+ {
+ /* The target rate in 16th bits per frame */
+ int target=st->VBR_rate;
+ /* Shortblocks get a large boost in bitrate, but since they are uncommon long blocks are not greatly effected */
+ if (shortBlocks)
+ target*=2;
+ else if (st->mode->nbShortMdcts > 1)
+ target-=(target+14)/28;
+
+ /*The average energy is removed from the target and the actual energy added*/
+ target=target-588+ec_enc_tell(&enc, 4);
+
+ /* In VBR mode the frame size must not be reduced so much that it would result in the coarse energy busting its budget */
+ target=IMAX(coarse_needed,(target+64)/128);
+ nbCompressedBytes=IMIN(nbCompressedBytes,target);
+ }
+
ALLOC(offsets, st->mode->nbEBands, int);
ALLOC(stereo_mode, st->mode->nbEBands, int);
stereo_decision(st->mode, X, stereo_mode, st->mode->nbEBands);
@@ -807,7 +831,7 @@
{
case CELT_SET_COMPLEXITY_REQUEST:
{
- int value = va_arg(ap, int);
+ int value = va_arg(ap, celt_int32_t);
if (value<0 || value>10)
goto bad_arg;
if (value<=2) {
@@ -822,7 +846,7 @@
break;
case CELT_SET_LTP_REQUEST:
{
- int value = va_arg(ap, int);
+ int value = va_arg(ap, celt_int32_t);
if (value<0 || value>1 || (value==1 && st->pitch_available==0))
goto bad_arg;
if (value==0)
@@ -829,6 +853,17 @@
st->pitch_enabled = 0;
else
st->pitch_enabled = 1;
+ }
+ break;
+ case CELT_SET_VBR_RATE_REQUEST:
+ {
+ int value = va_arg(ap, celt_int32_t);
+ if (value<0)
+ goto bad_arg;
+ if (value>3072000)
+ value = 3072000;
+ st->VBR_rate = ((st->mode->Fs<<3)+(st->block_size>>1))/st->block_size;
+ st->VBR_rate = ((value<<7)+(st->VBR_rate>>1))/st->VBR_rate;
}
break;
default:
--- a/libcelt/celt.h
+++ b/libcelt/celt.h
@@ -51,7 +51,7 @@
#define EXPORT
#endif
-#define _celt_check_int(x) (((void)((x) == (int)0)), (int)(x))
+#define _celt_check_int(x) (((void)((x) == (celt_int32_t)0)), (celt_int32_t)(x))
/* Error codes */
/** No error */
@@ -71,9 +71,12 @@
#define CELT_SET_COMPLEXITY_REQUEST 2
/** Controls the complexity from 0-10 (int) */
#define CELT_SET_COMPLEXITY(x) CELT_SET_COMPLEXITY_REQUEST, _celt_check_int(x)
-#define CELT_SET_LTP_REQUEST 3
+#define CELT_SET_LTP_REQUEST 4
/** Activate or deactivate the use of the long term predictor (PITCH) from 0 or 1 (int) */
#define CELT_SET_LTP(x) CELT_SET_LTP_REQUEST, _celt_check_int(x)
+#define CELT_SET_VBR_RATE_REQUEST 6
+/** Set the target VBR rate in bits per second (int); 0=CBR (default) */
+#define CELT_SET_VBR_RATE(x) CELT_SET_VBR_RATE_REQUEST, _celt_check_int(x)
/** GET the frame size used in the current mode */
#define CELT_GET_FRAME_SIZE 1000
--- a/libcelt/quant_bands.c
+++ b/libcelt/quant_bands.c
@@ -122,10 +122,11 @@
celt_free(freq);
}
-static void quant_coarse_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, unsigned budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
+static unsigned quant_coarse_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, unsigned budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
{
int i;
unsigned bits;
+ unsigned bits_used = 0;
celt_word16_t prev = 0;
celt_word16_t coef = m->ePredCoef;
celt_word16_t beta;
@@ -159,7 +160,8 @@
#endif
/* If we don't have enough bits to encode all the energy, just assume something safe.
We allow slightly busting the budget here */
- if (ec_enc_tell(enc, 0) - bits > budget)
+ bits_used=ec_enc_tell(enc, 0) - bits;
+ if (bits_used > budget)
{
qi = -1;
error[i] = 128;
@@ -174,6 +176,7 @@
oldEBands[i] = -QCONST16(12.f,8);
prev = mean+prev+MULT16_16_Q15(Q15ONE-beta,q);
}
+ return bits_used;
}
static void quant_fine_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, celt_word16_t *error, int *fine_quant, ec_enc *enc)
@@ -279,7 +282,7 @@
-void quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
+unsigned quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
{
int C;
C = m->nbChannels;
@@ -286,20 +289,24 @@
if (C==1)
{
- quant_coarse_energy_mono(m, eBands, oldEBands, budget, intra, prob, error, enc);
+ return quant_coarse_energy_mono(m, eBands, oldEBands, budget, intra, prob, error, enc);
} else {
int c;
+ unsigned maxBudget=0;
for (c=0;c<C;c++)
{
int i;
+ unsigned coarse_needed;
VARDECL(celt_ener_t, E);
SAVE_STACK;
ALLOC(E, m->nbEBands, celt_ener_t);
for (i=0;i<m->nbEBands;i++)
E[i] = eBands[C*i+c];
- quant_coarse_energy_mono(m, E, oldEBands+c*m->nbEBands, budget/C, intra, prob, error+c*m->nbEBands, enc);
+ coarse_needed=quant_coarse_energy_mono(m, E, oldEBands+c*m->nbEBands, budget/C, intra, prob, error+c*m->nbEBands, enc);
+ maxBudget=IMAX(maxBudget,coarse_needed);
RESTORE_STACK;
}
+ return maxBudget*C;
}
}
--- a/libcelt/quant_bands.h
+++ b/libcelt/quant_bands.h
@@ -44,7 +44,7 @@
int intra_decision(celt_ener_t *eBands, celt_word16_t *oldEBands, int len);
-void quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc);
+unsigned quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc);
void quant_fine_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, celt_word16_t *error, int *fine_quant, ec_enc *enc);
--- a/tools/celtenc.c
+++ b/tools/celtenc.c
@@ -80,12 +80,14 @@
}
#define MAX_FRAME_SIZE 2000
-#define MAX_FRAME_BYTES 2000
+#define MAX_FRAME_BYTES 300
+#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
+#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */
/* Convert input audio bits, endians and channels */
static int read_samples(FILE *fin,int frame_size, int bits, int channels, int lsb, short * input, char *buff, celt_int32_t *size)
{
- unsigned char in[MAX_FRAME_BYTES*2];
+ unsigned char in[MAX_FRAME_SIZE*2];
int i;
short *s;
int nb_read;
@@ -212,6 +214,7 @@
printf ("\n");
printf ("Options:\n");
printf (" --bitrate n Encoding bit-rate in kbit/sec\n");
+ printf (" --vbr Use variable bitrate encoding\n");
printf (" --comp n Encoding complexity (0-10)\n");
printf (" --framesize n Frame size (Default: 256)\n");
printf (" --skeleton Outputs ogg skeleton metadata (may cause incompatibilities)\n");
@@ -248,10 +251,14 @@
CELTMode *mode;
void *st;
unsigned char bits[MAX_FRAME_BYTES];
+ int with_vbr = 0;
int with_skeleton = 0;
+ int total_bytes = 0;
+ int peak_bytes = 0;
struct option long_options[] =
{
{"bitrate", required_argument, NULL, 0},
+ {"vbr",no_argument,NULL, 0},
{"comp", required_argument, NULL, 0},
{"framesize", required_argument, NULL, 0},
{"skeleton",no_argument,NULL, 0},
@@ -312,6 +319,9 @@
if (strcmp(long_options[option_index].name,"bitrate")==0)
{
bitrate = atof (optarg);
+ } else if (strcmp(long_options[option_index].name,"vbr")==0)
+ {
+ with_vbr=1;
} else if (strcmp(long_options[option_index].name,"skeleton")==0)
{
with_skeleton=1;
@@ -444,13 +454,11 @@
}
}
- if (bitrate<0)
+ if (bitrate<=0.005)
if (chan==1)
bitrate=64.0;
else
bitrate=128.0;
- if (chan>2) {
- }
bytes_per_packet = (bitrate*1000*frame_size/rate+4)/8;
@@ -457,13 +465,19 @@
if (bytes_per_packet < 8) {
bytes_per_packet=8;
fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too low. Setting CELT to 8 bytes/frame.\n",bitrate);
- } else if (bytes_per_packet > 300) {
- bytes_per_packet=300;
- fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too high. Setting CELT to 300 bytes/frame.\n",bitrate);
+ } else if (bytes_per_packet > MAX_FRAME_BYTES) {
+ bytes_per_packet=MAX_FRAME_BYTES;
+ fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too high. Setting CELT to %d bytes/frame.\n",bitrate,MAX_FRAME_BYTES);
}
- bitrate = ((rate/(float)frame_size)*8*bytes_per_packet)/1000.0;
-
+ if (with_vbr)
+ {
+ /*In VBR mode the bytes_per_packet argument becomes a hard maximum. 3x the average rate is just a random choice.*/
+ bytes_per_packet=IMIN(bytes_per_packet*3,MAX_FRAME_BYTES);
+ } else {
+ bitrate = ((rate/(float)frame_size)*8*bytes_per_packet)/1000.0;
+ }
+
mode = celt_mode_create(rate, chan, frame_size, NULL);
if (!mode)
return 1;
@@ -483,13 +497,27 @@
if (chan==2)
st_string="stereo";
if (!quiet)
- fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d bytes per packet) with bitstream version %d\n",
+ if (with_vbr)
+ fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d maximum bytes per packet) with bitstream version %d\n",
header.sample_rate, st_string, frame_size, bitrate, bytes_per_packet,bitstream);
+ else
+ fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d bytes per packet) with bitstream version %d\n",
+ header.sample_rate, st_string, frame_size, bitrate, bytes_per_packet,bitstream);
}
/*Initialize CELT encoder*/
st = celt_encoder_create(mode);
+ if (with_vbr)
+ {
+ int tmp = (bitrate*1000);
+ if (celt_encoder_ctl(st, CELT_SET_VBR_RATE(tmp)) != CELT_OK)
+ {
+ fprintf (stderr, "VBR request failed\n");
+ return 1;
+ }
+ }
+
if (complexity!=-127) {
if (celt_encoder_ctl(st, CELT_SET_COMPLEXITY(complexity)) != CELT_OK)
{
@@ -624,6 +652,8 @@
break;
}
nb_encoded += frame_size;
+ total_bytes += nbBytes;
+ peak_bytes=IMAX(nbBytes,peak_bytes);
if (wave_input)
{
@@ -681,6 +711,9 @@
else
bytes_written += ret;
}
+
+ if (with_vbr && !quiet)
+ fprintf (stderr, "Average rate %0.3fkbit/sec, %d peak bytes per packet\n", (total_bytes*8.0/((float)nb_encoded/header.sample_rate))/1000.0, peak_bytes);
celt_encoder_destroy(st);
celt_mode_destroy(mode);