ref: 56522addc2d8e54a8b61740fdaa3237183e35514
parent: f43488cdc2e6b776fc228f074051f095b9b6509f
author: Jean-Marc Valin <[email protected]>
date: Fri Jun 5 13:17:25 EDT 2009
IETF doc update, including better source code formatting
--- a/doc/ietf/convert_source.sh
+++ b/doc/ietf/convert_source.sh
@@ -11,9 +11,21 @@
echo '#include "substitutions.h"' > tata.c
echo 'SOURCE_CODE_BEGIN' >> tata.c
-cat ../../libcelt/$i | sed 's/^#/\/\/PREPROCESS_REMOVE#/' >> tata.c
-gcc -C -E -nostdinc tata.c | grep -v '^#' | sed 's/\/\/PREPROCESS_REMOVE//' | perl -ne 'if ($begin) {print $_} if (/SOURCE_CODE_BEGIN/) {$begin=1}' > tata2.c
-indent --no-tabs -l72 --format-all-comments tata2.c -o tata.c
+
+if echo $i | grep '\.h' > /dev/null; then
+ cat ../../libcelt/$i | sed 's/^#/\/\/PREPROCESS_REMOVE#/' >> tata.c
+else
+ cat ../../libcelt/$i | sed 's/^#include/\/\/PREPROCESS_REMOVE#include/' | sed 's/^#define/\/\/PREPROCESS_REMOVE#define/'>> tata.c
+fi
+
+#cat ../../libcelt/$i | sed 's/^#/\/\/PREPROCESS_REMOVE#/' >> tata.c
+#cat ../../libcelt/$i | sed 's/^#include/\/\/PREPROCESS_REMOVE#include/' | sed 's/^#define/\/\/PREPROCESS_REMOVE#define/'>> tata.c
+gcc -DHAVE_CONFIG_H -C -E -nostdinc tata.c | grep -v '^#' | sed 's/\/\/PREPROCESS_REMOVE//' | perl -ne 'if ($begin) {print $_} if (/SOURCE_CODE_BEGIN/) {$begin=1}' > tata2.c
+
+#cat ../../libcelt/$i >> tata.c
+#gcc -C -E -nostdinc tata.c -fdirectives-only | perl -ne 'if ($begin) {print $_} if (/SOURCE_CODE_BEGIN/) {$begin=1}' > tata2.c
+
+indent -sob -i2 -bl -bli0 --no-tabs -l72 --format-all-comments tata2.c -o tata.c
cat tata.c > source/$i
@@ -37,5 +49,4 @@
rm source/dump_modes*
rm source/header*
rm source/fixed*
-rm source/tata.c
--- a/doc/ietf/draft-valin-celt-codec.xml
+++ b/doc/ietf/draft-valin-celt-codec.xml
@@ -99,7 +99,10 @@
<t>
CELT stands for "Constrained Energy Lapped Transform". This is
the fundamental princple of the codec: the quantization process is designed in such a way
-as to preserve the energy in a certain number of bands.
+as to preserve the energy in a certain number of bands. The theoretical aspects of the
+codec is described in greater details <xref target="celt-tasl"/> and
+<xref target="celt-eusipco"/>. Although these papers describe a slightly older version of
+the codec (version 0.3.2 and 0.5.1, respectively), the principles remain the same.
</t>
<t>CELT is a transform codec, based on the Modified Discrete Cosine Transform
@@ -152,10 +155,8 @@
<t>The MDCT implementation has no special characteristic. The
input is a windowed signal (after pre-emphasis) of 2*N samples and the output is N
frequency-domain samples. A "low-overlap" window is used to reduce the algorithmc delay.
-It is composed of a smaller window with symmetric zero padding on both sides. The window
-is the same as the one used in the Vorbis codec and defined as:
-W(n)=[sin(pi/2*sin(pi/2*(n+.5)/L))]^2. The MDCT is computed in mdct_forward()
-(<xref target="mdct.c">mdct.c</xref>), and includes the windowing.
+It is derived from a basic (with full overlap) window that is the same as the one used in the Vorbis codec: W(n)=[sin(pi/2*sin(pi/2*(n+.5)/L))]^2. The low-overlap window is created by zero padding the basic window and inserting ones in the middle, such that the resulting window still satisfies power complementarity. The MDCT is computed in mdct_forward()
+(<xref target="mdct.c">mdct.c</xref>), which includes the windowing operation.
</t>
</section>
@@ -202,12 +203,12 @@
<t>
The Laplace distribution for each band is defined by a 16-bit (Q15) decay parameter.
-Thus, the value 0 has a probability of p[0]=32767*(16384-decay)/(16384+decay). The
+Thus, the value 0 has a probability of p[0]=2*(16384*(16384-decay)/(16384+decay)). The
values +/-i each have a probability p[i] = (p[i-1]*decay)>>14. The value of p[i] is always
-rounded down (to avoid exceeding 32767 as the sum of all probabilities), so it is possible
-for the sum to be less than 32767. There is thus is small range of values that are impossible.
-The signed values corresponding to symbols 0, 1, 2, 3, 4, ... are [0, +1, -1, +2, -2, ...].
-The encoding of the Laplace-distributed values is implemented in ec_laplace_encode() (<xref target="laplace.c">laplace.c</xref>).
+rounded down (to avoid exceeding 32768 as the sum of all probabilities), so it is possible
+for the sum to be less than 32768. In that case additional values with a probability of 1 are encoded. The signed values corresponding to symbols 0, 1, 2, 3, 4, ...
+are [0, +1, -1, +2, -2, ...]. The encoding of the Laplace-distributed values is
+implemented in ec_laplace_encode() (<xref target="laplace.c">laplace.c</xref>).
</t>
</section>
@@ -412,6 +413,27 @@
<references title="Informative References">
+<reference anchor="celt-tasl">
+<front>
+<title>A High-Quality Speech and Audio Codec With Less Than 10 ms delay</title>
+<author initials="JM" surname="Valin" fullname="Jean-Marc Valin"><organization/></author>
+<author initials="T. B." surname="Terriberry" fullname="Timothy Terriberry"><organization/></author>
+<author initials="C." surname="Montgomery" fullname="Christopher Montgomery"><organization/></author>
+<author initials="G." surname="Maxwell" fullname="Gregory Maxwell"><organization/></author>
+</front>
+<seriesInfo name="To appear in IEEE Transactions on Audio, Speech and Language Processing" value="2009" />
+</reference>
+
+<reference anchor="celt-eusipco">
+<front>
+<title>A Full-Bandwidth Audio Codec with Low Complexity and Very Low Delay</title>
+<author initials="JM" surname="Valin" fullname="Jean-Marc Valin"><organization/></author>
+<author initials="T. B." surname="Terriberry" fullname="Timothy Terriberry"><organization/></author>
+<author initials="G." surname="Maxwell" fullname="Gregory Maxwell"><organization/></author>
+</front>
+<seriesInfo name="Accepted for EUSIPCO" value="2009" />
+</reference>
+
<reference anchor="celt-website">
<front>
<title>The CELT ultra-low delay audio codec</title>
@@ -515,7 +537,6 @@
<?rfc include="xml_source/kiss_fftr.h"?>
<?rfc include="xml_source/kiss_fftr.c"?>
<?rfc include="xml_source/kfft_single.h"?>
-<?rfc include="xml_source/kfft_single.c"?>
<?rfc include="xml_source/kfft_double.h"?>
</section>
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -60,8 +60,10 @@
17896, 20868, 23687, 26258, 28492, 30314, 31662, 32489};
#else
static const float transientWindow[16] = {
- 0.0085135, 0.0337639, 0.0748914, 0.1304955, 0.1986827, 0.2771308, 0.3631685, 0.4538658,
- 0.5461342, 0.6368315, 0.7228692, 0.8013173, 0.8695045, 0.9251086, 0.9662361, 0.9914865};
+ 0.0085135, 0.0337639, 0.0748914, 0.1304955,
+ 0.1986827, 0.2771308, 0.3631685, 0.4538658,
+ 0.5461342, 0.6368315, 0.7228692, 0.8013173,
+ 0.8695045, 0.9251086, 0.9662361, 0.9914865};
#endif
#define ENCODERVALID 0x4c434554
@@ -86,7 +88,7 @@
int fold_decision;
int VBR_rate; /* Target number of 16th bits per frame */
- celt_word16_t * restrict preemph_memE; /* Input is 16-bit, so why bother with 32 */
+ celt_word16_t * restrict preemph_memE;
celt_sig_t * restrict preemph_memD;
celt_sig_t *in_mem;
@@ -278,7 +280,8 @@
return ratio > 20;
}
-/** Apply window and compute the MDCT for all sub-frames and all channels in a frame */
+/** Apply window and compute the MDCT for all sub-frames and
+ all channels in a frame */
static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t * restrict in, celt_sig_t * restrict out)
{
const int C = CHANNELS(mode);
@@ -336,7 +339,8 @@
}
}
-/** Compute the IMDCT and apply window for all sub-frames and all channels in a frame */
+/** Compute the IMDCT and apply window for all sub-frames and
+ all channels in a frame */
static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t *X, int transient_time, int transient_shift, celt_sig_t * restrict out_mem)
{
int c, N4;
@@ -406,8 +410,8 @@
x[N4+j] *= 1<<transient_shift;
#endif
}
- /* The first and last part would need to be set to zero if we actually
- wanted to use them. */
+ /* The first and last part would need to be set to zero
+ if we actually wanted to use them. */
for (j=0;j<overlap;j++)
out_mem[C*(MAX_PERIOD-N)+C*j+c] += x[j+N4];
for (j=0;j<overlap;j++)
@@ -530,7 +534,8 @@
if (nbCompressedBytes<0)
return CELT_BAD_ARG;
- /* The memset is important for now in case the encoder doesn't fill up all the bytes */
+ /* The memset is important for now in case the encoder doesn't
+ fill up all the bytes */
CELT_MEMSET(compressed, 0, nbCompressedBytes);
ec_byte_writeinit_buffer(&buf, compressed, nbCompressedBytes);
ec_enc_init(&enc,&buf);
@@ -675,7 +680,8 @@
else
st->delayedIntra = 0;
/* Pitch analysis: we do it early to save on the peak stack space */
- /* Don't use pitch if there isn't enough data available yet, or if we're using shortBlocks */
+ /* Don't use pitch if there isn't enough data available yet,
+ or if we're using shortBlocks */
has_pitch = st->pitch_enabled && (st->pitch_available >= MAX_PERIOD) && (!shortBlocks) && !intra_ener;
#ifdef EXP_PSY
ALLOC(tonality, MAX_PERIOD/4, celt_word16_t);
@@ -700,7 +706,8 @@
printf ("\n");*/
#endif
- /* Deferred allocation after find_spectral_pitch() to reduce the peak memory usage */
+ /* Deferred allocation after find_spectral_pitch() to reduce
+ the peak memory usage */
ALLOC(X, C*N, celt_norm_t); /**< Interleaved normalised MDCTs */
ALLOC(P, C*N, celt_norm_t); /**< Interleaved normalised pitch MDCTs*/
ALLOC(gains,st->mode->nbPBands, celt_pgain_t);
@@ -747,7 +754,8 @@
compute_band_energies(st->mode, freq, bandEp);
normalise_bands(st->mode, freq, P, bandEp);
pitch_power = bandEp[0]+bandEp[1]+bandEp[2];
- /* Check if we can safely use the pitch (i.e. effective gain isn't too high) */
+ /* Check if we can safely use the pitch (i.e. effective gain
+ isn't too high) */
curr_power = bandE[0]+bandE[1]+bandE[2];
if ((MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power))
{
@@ -809,13 +817,15 @@
/* The target rate in 16th bits per frame */
int target=st->VBR_rate;
- /* Shortblocks get a large boost in bitrate, but since they are uncommon long blocks are not greatly effected */
+ /* Shortblocks get a large boost in bitrate, but since they
+ are uncommon long blocks are not greatly effected */
if (shortBlocks)
target*=2;
else if (st->mode->nbShortMdcts > 1)
target-=(target+14)/28;
- /*The average energy is removed from the target and the actual energy added*/
+ /* The average energy is removed from the target and the actual
+ energy added*/
target=target-588+ec_enc_tell(&enc, 4);
/* In VBR mode the frame size must not be reduced so much that it would result in the coarse energy busting its budget */
@@ -868,7 +878,8 @@
#endif
}
compute_inv_mdcts(st->mode, shortBlocks, freq, transient_time, transient_shift, st->out_mem);
- /* De-emphasis and put everything back at the right place in the synthesis history */
+ /* De-emphasis and put everything back at the right place
+ in the synthesis history */
if (optional_synthesis != NULL) {
for (c=0;c<C;c++)
{
@@ -884,11 +895,8 @@
}
}
- /*fprintf (stderr, "remaining bits after encode = %d\n", nbCompressedBytes*8-ec_enc_tell(&enc, 0));*/
- /*if (ec_enc_tell(&enc, 0) < nbCompressedBytes*8 - 7)
- celt_warning_int ("many unused bits: ", nbCompressedBytes*8-ec_enc_tell(&enc, 0));*/
-
- /* Finishing the stream with a 0101... pattern so that the decoder can check is everything's right */
+ /* Finishing the stream with a 0101... pattern so that the
+ decoder can check is everything's right */
{
int val = 0;
while (ec_enc_tell(&enc, 0) < nbCompressedBytes*8)
@@ -1068,11 +1076,11 @@
return CELT_UNIMPLEMENTED;
}
-/****************************************************************************/
-/* */
-/* DECODER */
-/* */
-/****************************************************************************/
+/**********************************************************************/
+/* */
+/* DECODER */
+/* */
+/**********************************************************************/
#ifdef NEW_PLC
#define DECODE_BUFFER_SIZE 2048
#else
@@ -1197,7 +1205,8 @@
celt_free(st);
}
-/** Handles lost packets by just copying past data with the same offset as the last
+/** Handles lost packets by just copying past data with the same
+ offset as the last
pitch period */
#ifdef NEW_PLC
#include "plc.c"
@@ -1212,7 +1221,7 @@
int offset;
SAVE_STACK;
N = st->block_size;
- ALLOC(freq,C*N, celt_sig_t); /**< Interleaved signal MDCTs */
+ ALLOC(freq,C*N, celt_sig_t); /**< Interleaved signal MDCTs */
len = N+st->mode->overlap;
#if 0
@@ -1294,8 +1303,8 @@
N4 = (N-st->overlap)>>1;
ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
- ALLOC(X, C*N, celt_norm_t); /**< Interleaved normalised MDCTs */
- ALLOC(P, C*N, celt_norm_t); /**< Interleaved normalised pitch MDCTs*/
+ ALLOC(X, C*N, celt_norm_t); /**< Interleaved normalised MDCTs */
+ ALLOC(P, C*N, celt_norm_t); /**< Interleaved normalised pitch MDCTs*/
ALLOC(bandE, st->mode->nbEBands*C, celt_ener_t);
ALLOC(gains, st->mode->nbPBands, celt_pgain_t);
--- a/libcelt/celt.h
+++ b/libcelt/celt.h
@@ -2,7 +2,7 @@
(C) 2008 Gregory Maxwell */
/**
@file celt.h
- @brief Contains all the functions for encoding and decoding audio streams
+ @brief Contains all the functions for encoding and decoding audio
*/
/*
@@ -67,7 +67,7 @@
#define CELT_CORRUPTED_DATA -4
/** Invalid/unsupported request number */
#define CELT_UNIMPLEMENTED -5
-/** An encoder or decoder structure passed is invalid or already freed */
+/** An encoder or decoder structure is invalid or already freed */
#define CELT_INVALID_STATE -6
/* Requests */
@@ -78,10 +78,11 @@
/** Controls the complexity from 0-10 (int) */
#define CELT_SET_COMPLEXITY(x) CELT_SET_COMPLEXITY_REQUEST, _celt_check_int(x)
#define CELT_SET_LTP_REQUEST 4
-/** Activate or deactivate the use of the long term predictor (PITCH) from 0 or 1 (int) */
+/** Activate or deactivate the use of the long term predictor (pitch)
+ from 0 or 1 (int) */
#define CELT_SET_LTP(x) CELT_SET_LTP_REQUEST, _celt_check_int(x)
#define CELT_SET_VBR_RATE_REQUEST 6
-/** Set the target VBR rate in bits per second (int); 0=CBR (default) */
+/** Set the target VBR rate in bits per second(int); 0=CBR (default) */
#define CELT_SET_VBR_RATE(x) CELT_SET_VBR_RATE_REQUEST, _celt_check_int(x)
/** Reset the encoder/decoder memories to zero*/
#define CELT_RESET_STATE_REQUEST 8
@@ -100,21 +101,22 @@
#define CELT_GET_BITSTREAM_VERSION 2000
-/** Contains the state of an encoder. One encoder state is needed for each
- stream. It is initialised once at the beginning of the stream. Do *not*
- re-initialise the state for every frame.
+/** Contains the state of an encoder. One encoder state is needed
+ for each stream. It is initialised once at the beginning of the
+ stream. Do *not* re-initialise the state for every frame.
@brief Encoder state
*/
typedef struct CELTEncoder CELTEncoder;
-/** State of the decoder. One decoder state is needed for each stream. It is
- initialised once at the beginning of the stream. Do *not* re-initialise
- the state for every frame */
+/** State of the decoder. One decoder state is needed for each stream.
+ It is initialised once at the beginning of the stream. Do *not*
+ re-initialise the state for every frame */
typedef struct CELTDecoder CELTDecoder;
-/** The mode contains all the information necessary to create an encoder. Both
- the encoder and decoder need to be initialised with exactly the same mode,
- otherwise the quality will be very bad */
+/** The mode contains all the information necessary to create an
+ encoder. Both the encoder and decoder need to be initialised
+ with exactly the same mode, otherwise the quality will be very
+ bad */
typedef struct CELTMode CELTMode;
@@ -123,19 +125,20 @@
/* Mode calls */
-/** Creates a new mode struct. This will be passed to an encoder or decoder.
- The mode MUST NOT BE DESTROYED until the encoders and decoders that use it
- are destroyed as well.
+/** Creates a new mode struct. This will be passed to an encoder or
+ decoder. The mode MUST NOT BE DESTROYED until the encoders and
+ decoders that use it are destroyed as well.
@param Fs Sampling rate (32000 to 96000 Hz)
@param channels Number of channels
- @param frame_size Number of samples (per channel) to encode in each packet (even values; 64 - 512)
+ @param frame_size Number of samples (per channel) to encode in each
+ packet (even values; 64 - 512)
@param error Returned error code (if NULL, no error will be returned)
@return A newly created mode
*/
EXPORT CELTMode *celt_mode_create(celt_int32_t Fs, int channels, int frame_size, int *error);
-/** Destroys a mode struct. Only call this after all encoders and decoders
- using this mode are destroyed as well.
+/** Destroys a mode struct. Only call this after all encoders and
+ decoders using this mode are destroyed as well.
@param mode Mode to be destroyed
*/
EXPORT void celt_mode_destroy(CELTMode *mode);
@@ -146,10 +149,11 @@
/* Encoder stuff */
-/** Creates a new encoder state. Each stream needs its own encoder state (can't
- be shared across simultaneous streams).
- @param mode Contains all the information about the characteristics of the stream
- (must be the same characteristics as used for the decoder)
+/** Creates a new encoder state. Each stream needs its own encoder
+ state (can't be shared across simultaneous streams).
+ @param mode Contains all the information about the characteristics of
+ * the stream (must be the same characteristics as used for the
+ * decoder)
@return Newly created encoder state.
*/
EXPORT CELTEncoder *celt_encoder_create(const CELTMode *mode);
@@ -162,18 +166,19 @@
/** Encodes a frame of audio.
@param st Encoder state
@param pcm PCM audio in float format, with a normal range of ±1.0.
- * Samples with a range beyond ±1.0 are supported but will be clipped by
- * decoders using the integer API and should only be used if it is known that
- * the far end supports extended dynmaic range. There must be exactly
+ * Samples with a range beyond ±1.0 are supported but will
+ * be clipped by decoders using the integer API and should
+ * only be used if it is known that the far end supports
+ * extended dynmaic range. There must be exactly
* frame_size samples per channel.
@param optional_synthesis If not NULL, the encoder copies the audio signal that
- * the decoder would decode. It is the same as calling the
- * decoder on the compressed data, just faster.
- * This may alias pcm.
+ * the decoder would decode. It is the same as calling the
+ * decoder on the compressed data, just faster.
+ * This may alias pcm.
@param compressed The compressed data is written here. This may not alias pcm or
- * optional_synthesis.
+ * optional_synthesis.
@param nbCompressedBytes Maximum number of bytes to use for compressing the frame
- * (can change from one frame to another)
+ * (can change from one frame to another)
@return Number of bytes written to "compressed". Will be the same as
* "nbCompressedBytes" unless the stream is VBR and will never be larger.
* If negative, an error has occurred (see error codes). It is IMPORTANT that
--- a/libcelt/testcelt.c
+++ b/libcelt/testcelt.c
@@ -64,7 +64,9 @@
celt_int16_t *in, *out;
if (argc != 9 && argc != 8 && argc != 7)
{
- fprintf (stderr, "Usage: testcelt <rate> <channels> <frame size> <bytes per packet> [<complexity> [packet loss rate]] <input> <output>\n");
+ fprintf (stderr, "Usage: testcelt <rate> <channels> <frame size> "
+ " <bytes per packet> [<complexity> [packet loss rate]] "
+ "<input> <output>\n");
return 1;
}
@@ -83,7 +85,8 @@
bytes_per_packet = atoi(argv[4]);
if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
{
- fprintf (stderr, "bytes per packet must be between 0 and %d\n",MAX_PACKET);
+ fprintf (stderr, "bytes per packet must be between 0 and %d\n",
+ MAX_PACKET);
return 1;
}
@@ -102,7 +105,6 @@
return 1;
}
- /* Use mode4 for stereo and don't forget to change the value of CHANNEL above */
enc = celt_encoder_create(mode);
dec = celt_decoder_create(mode);