ref: 8600f69f796f48c359439577d092293e167be58e
parent: 8b2a59235f17cdcd88780f851ba90398c1208315
author: Jean-Marc Valin <[email protected]>
date: Fri Feb 29 10:14:12 EST 2008
Initial support for a managed stack/scratchpad. Still needs some work.
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -118,6 +118,7 @@
int i;
VARDECL(celt_ener_t *tmpE);
VARDECL(celt_sig_t *freq);
+ SAVE_STACK;
ALLOC(tmpE, m->nbEBands*m->nbChannels, celt_ener_t);
ALLOC(freq, m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1], celt_sig_t);
for (i=0;i<m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1];i++)
@@ -124,14 +125,17 @@
freq[i] = SHL32(EXTEND32(X[i]), 10);
compute_band_energies(m, freq, tmpE);
normalise_bands(m, freq, X, tmpE);
+ RESTORE_STACK;
}
#else
void renormalise_bands(const CELTMode *m, celt_norm_t *X)
{
VARDECL(celt_ener_t *tmpE);
+ SAVE_STACK;
ALLOC(tmpE, m->nbEBands*m->nbChannels, celt_ener_t);
compute_band_energies(m, X, tmpE);
normalise_bands(m, X, X, tmpE);
+ RESTORE_STACK;
}
#endif
@@ -224,7 +228,8 @@
VARDECL(celt_norm_t *norm);
VARDECL(int *pulses);
VARDECL(int *offsets);
-
+ SAVE_STACK;
+
B = m->nbMdctBlocks*m->nbChannels;
ALLOC(norm, B*eBands[m->nbEBands+1], celt_norm_t);
@@ -277,6 +282,7 @@
}
for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
X[i] = 0;
+ RESTORE_STACK;
}
/* Decoding of the residual */
@@ -288,7 +294,8 @@
VARDECL(celt_norm_t *norm);
VARDECL(int *pulses);
VARDECL(int *offsets);
-
+ SAVE_STACK;
+
B = m->nbMdctBlocks*m->nbChannels;
ALLOC(norm, B*eBands[m->nbEBands+1], celt_norm_t);
@@ -335,6 +342,7 @@
}
for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
X[i] = 0;
+ RESTORE_STACK;
}
void stereo_mix(const CELTMode *m, celt_norm_t *X, celt_ener_t *bank, int dir)
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -33,6 +33,8 @@
#include "config.h"
#endif
+#define CELT_C
+
#include "os_support.h"
#include "mdct.h"
#include <math.h>
@@ -182,6 +184,7 @@
celt_word32_t E = 0;
VARDECL(celt_word32_t *x);
VARDECL(celt_word32_t *tmp);
+ SAVE_STACK;
ALLOC(x, 2*N, celt_word32_t);
ALLOC(tmp, N, celt_word32_t);
for (c=0;c<C;c++)
@@ -200,6 +203,7 @@
out[C*B*j+C*i+c] = tmp[j];
}
}
+ RESTORE_STACK;
return E;
}
@@ -209,6 +213,7 @@
int i, c, N4;
VARDECL(celt_word32_t *x);
VARDECL(celt_word32_t *tmp);
+ SAVE_STACK;
ALLOC(x, 2*N, celt_word32_t);
ALLOC(tmp, N, celt_word32_t);
N4 = (N-overlap)/2;
@@ -231,6 +236,7 @@
mdct_overlap[C*j+c] = x[N+N4+j];
}
}
+ RESTORE_STACK;
}
int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, int nbCompressedBytes)
@@ -245,6 +251,7 @@
VARDECL(celt_norm_t *P);
VARDECL(celt_ener_t *bandE);
VARDECL(celt_pgain_t *gains);
+ SAVE_STACK;
if (check_mode(st->mode) != CELT_OK)
return CELT_INVALID_MODE;
@@ -414,6 +421,7 @@
if (nbBytes > nbCompressedBytes)
{
celt_warning_int ("got too many bytes:", nbBytes);
+ RESTORE_STACK;
return CELT_INTERNAL_ERROR;
}
/*printf ("%d\n", *nbBytes);*/
@@ -427,6 +435,7 @@
ec_byte_reset(&st->buf);
ec_enc_init(&st->enc,&st->buf);
+ RESTORE_STACK;
return nbCompressedBytes;
}
@@ -539,6 +548,7 @@
int i, c, N, B, C;
int pitch_index;
VARDECL(celt_sig_t *freq);
+ SAVE_STACK;
N = st->block_size;
B = st->nb_blocks;
C = st->mode->nbChannels;
@@ -567,6 +577,7 @@
}
}
}
+ RESTORE_STACK;
}
int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm)
@@ -581,6 +592,7 @@
VARDECL(celt_norm_t *P);
VARDECL(celt_ener_t *bandE);
VARDECL(celt_pgain_t *gains);
+ SAVE_STACK;
if (check_mode(st->mode) != CELT_OK)
return CELT_INVALID_MODE;
@@ -596,10 +608,14 @@
ALLOC(gains, st->mode->nbPBands, celt_pgain_t);
if (check_mode(st->mode) != CELT_OK)
+ {
+ RESTORE_STACK;
return CELT_INVALID_MODE;
+ }
if (data == NULL)
{
celt_decode_lost(st, pcm);
+ RESTORE_STACK;
return 0;
}
@@ -676,6 +692,7 @@
if (ec_dec_uint(&dec, 2) != val)
{
celt_warning("decode error");
+ RESTORE_STACK;
return CELT_CORRUPTED_DATA;
}
val = 1-val;
@@ -682,6 +699,7 @@
}
}
+ RESTORE_STACK;
return 0;
/*printf ("\n");*/
}
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -111,13 +111,17 @@
celt_uint32_t ncwrs(int _n,int _m)
{
int i;
+ celt_uint32_t ret;
VARDECL(celt_uint32_t *nc);
+ SAVE_STACK;
ALLOC(nc,_n+1, celt_uint32_t);
for (i=0;i<_n+1;i++)
nc[i] = 1;
for (i=0;i<_m;i++)
next_ncwrs32(nc, _n+1, 0);
- return nc[_n];
+ ret = nc[_n];
+ RESTORE_STACK;
+ return ret;
}
/*Returns the numer of ways of choosing _m elements from a set of size _n with
@@ -125,13 +129,17 @@
celt_uint64_t ncwrs64(int _n,int _m)
{
int i;
+ celt_uint64_t ret;
VARDECL(celt_uint64_t *nc);
+ SAVE_STACK;
ALLOC(nc,_n+1, celt_uint64_t);
for (i=0;i<_n+1;i++)
nc[i] = 1;
for (i=0;i<_m;i++)
next_ncwrs64(nc, _n+1, 0);
- return nc[_n];
+ ret = nc[_n];
+ RESTORE_STACK;
+ return ret;
}
@@ -143,6 +151,7 @@
int j;
int k;
VARDECL(celt_uint32_t *nc);
+ SAVE_STACK;
ALLOC(nc,_n+1, celt_uint32_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@@ -176,6 +185,7 @@
else
prev_ncwrs32(nc, _n+1, 1);
}
+ RESTORE_STACK;
}
/*Returns the index of the given combination of _m elements chosen from a set
@@ -187,6 +197,7 @@
int j;
int k;
VARDECL(celt_uint32_t *nc);
+ SAVE_STACK;
ALLOC(nc,_n+1, celt_uint32_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@@ -218,6 +229,7 @@
}
if((k==0||_x[k]!=_x[k-1])&&_s[k])i+=p>>1;
}
+ RESTORE_STACK;
return i;
}
@@ -229,6 +241,7 @@
int j;
int k;
VARDECL(celt_uint64_t *nc);
+ SAVE_STACK;
ALLOC(nc,_n+1, celt_uint64_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@@ -262,6 +275,7 @@
else
prev_ncwrs64(nc, _n+1, 1);
}
+ RESTORE_STACK;
}
/*Returns the index of the given combination of _m elements chosen from a set
@@ -273,6 +287,7 @@
int j;
int k;
VARDECL(celt_uint64_t *nc);
+ SAVE_STACK;
ALLOC(nc,_n+1, celt_uint64_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@@ -304,6 +319,7 @@
}
if((k==0||_x[k]!=_x[k-1])&&_s[k])i+=p>>1;
}
+ RESTORE_STACK;
return i;
}
@@ -350,6 +366,7 @@
{
VARDECL(int *comb);
VARDECL(int *signs);
+ SAVE_STACK;
ALLOC(comb, K, int);
ALLOC(signs, K, int);
@@ -366,6 +383,7 @@
id = icwrs64(N, K, comb, signs, &bound);
ec_enc_uint64(enc,id,bound);
}
+ RESTORE_STACK;
}
void decode_pulses(int *_y, int N, int K, ec_dec *dec)
@@ -372,6 +390,7 @@
{
VARDECL(int *comb);
VARDECL(int *signs);
+ SAVE_STACK;
ALLOC(comb, K, int);
ALLOC(signs, K, int);
@@ -383,5 +402,6 @@
cwrsi64(N, K, ec_dec_uint64(dec, ncwrs64(N, K)), comb, signs);
comb2pulse(N, K, _y, comb, signs);
}
+ RESTORE_STACK;
}
--- a/libcelt/mdct.c
+++ b/libcelt/mdct.c
@@ -91,6 +91,7 @@
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar *f);
+ SAVE_STACK;
N = l->n;
N2 = N/2;
N4 = N/4;
@@ -126,6 +127,7 @@
out[2*i] = -S_MUL(f[2*i+1],l->trig[i+N4]) + S_MUL(f[2*i] ,l->trig[i]);
out[N2-1-2*i] = -S_MUL(f[2*i] ,l->trig[i+N4]) - S_MUL(f[2*i+1],l->trig[i]);
}
+ RESTORE_STACK;
}
@@ -134,6 +136,7 @@
int i;
int N, N2, N4, N8;
VARDECL(kiss_fft_scalar *f);
+ SAVE_STACK;
N = l->n;
N2 = N/2;
N4 = N/4;
@@ -173,6 +176,7 @@
out[i] =-out[N2-i-1];
out[N-i-1] = out[N2+i];
}
+ RESTORE_STACK;
}
--- a/libcelt/pitch.c
+++ b/libcelt/pitch.c
@@ -53,6 +53,7 @@
VARDECL(celt_word32_t *X);
VARDECL(celt_word32_t *Y);
VARDECL(celt_mask_t *curve);
+ SAVE_STACK;
int n2 = lag/2;
ALLOC(xx, lag*C, celt_word32_t);
ALLOC(yy, lag*C, celt_word32_t);
@@ -109,4 +110,5 @@
/*printf ("\n");
printf ("%d %f\n", *pitch, max_corr);
printf ("%d\n", *pitch);*/
+ RESTORE_STACK;
}
--- a/libcelt/psy.c
+++ b/libcelt/psy.c
@@ -126,6 +126,7 @@
{
int i;
VARDECL(float *psd);
+ SAVE_STACK;
int N=len/2;
ALLOC(psd, N, float);
psd[0] = X[0]*1.f*X[0];
@@ -134,7 +135,7 @@
/* TODO: Do tone masking */
/* Noise masking */
spreading_func(decay, psd, mask, N);
-
+ RESTORE_STACK;
}
void compute_mdct_masking(struct PsyDecay *decay, celt_word32_t *X, celt_mask_t *mask, int len)
@@ -141,6 +142,7 @@
{
int i;
VARDECL(float *psd);
+ SAVE_STACK;
ALLOC(psd, len, float);
for (i=0;i<len;i++)
mask[i] = X[i]*X[i];
@@ -152,5 +154,5 @@
/* TODO: Do tone masking */
/* Noise masking */
spreading_func(decay, psd, mask, len);
-
+ RESTORE_STACK;
}
--- a/libcelt/quant_bands.c
+++ b/libcelt/quant_bands.c
@@ -90,6 +90,7 @@
celt_word16_t prev = 0;
celt_word16_t coef = m->ePredCoef;
VARDECL(celt_word16_t *error);
+ SAVE_STACK;
/* The .7 is a heuristic */
celt_word16_t beta = MULT16_16_Q15(QCONST16(.7f,15),coef);
@@ -157,6 +158,7 @@
/*printf ("%d\n", ec_enc_tell(enc, 0)-9);*/
/*printf ("\n");*/
+ RESTORE_STACK;
}
static void unquant_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_dec *dec)
@@ -210,6 +212,7 @@
void quant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_enc *enc)
{
int C;
+ SAVE_STACK;
C = m->nbChannels;
@@ -263,6 +266,7 @@
celt_fatal("more than 2 channels not supported");
}
#endif
+ RESTORE_STACK;
}
@@ -270,6 +274,7 @@
void unquant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_dec *dec)
{
int C;
+ SAVE_STACK;
C = m->nbChannels;
if (C==1)
@@ -286,4 +291,5 @@
eBands[C*i+c] = E[i];
}
}
+ RESTORE_STACK;
}
--- a/libcelt/quant_pitch.c
+++ b/libcelt/quant_pitch.c
@@ -66,6 +66,7 @@
{
int i, id;
VARDECL(float *g2);
+ SAVE_STACK;
ALLOC(g2, len, float);
/*for (i=0;i<len;i++) printf ("%f ", gains[i]);printf ("\n");*/
for (i=0;i<len;i++)
@@ -75,6 +76,7 @@
/*for (i=0;i<len;i++) printf ("%f ", pgain_table[id*len+i]);printf ("\n");*/
for (i=0;i<len;i++)
gains[i] = PGAIN_SCALING*(sqrt(1-(1-pgain_table[id*len+i])*(1-pgain_table[id*len+i])));
+ RESTORE_STACK;
return id!=0;
}
--- a/libcelt/rate.c
+++ b/libcelt/rate.c
@@ -192,6 +192,7 @@
int j;
int firstpass;
VARDECL(int *bits);
+ SAVE_STACK;
ALLOC(bits, len, int);
lo = 0;
hi = 1<<BITRES;
@@ -235,14 +236,16 @@
break;
}
}
+ RESTORE_STACK;
return (out+BITROUND) >> BITRES;
}
int compute_allocation(const CELTMode *m, int *offsets, int total, int *pulses)
{
- int lo, hi, len;
+ int lo, hi, len, ret;
VARDECL(int *bits1);
VARDECL(int *bits2);
+ SAVE_STACK;
len = m->nbEBands;
ALLOC(bits1, len, int);
@@ -278,7 +281,9 @@
if (bits2[j] < 0)
bits2[j] = 0;
}
- return interp_bits2pulses(m, bits1, bits2, total, pulses, len);
+ ret = interp_bits2pulses(m, bits1, bits2, total, pulses, len);
+ RESTORE_STACK;
+ return ret;
}
}
--- a/libcelt/stack_alloc.h
+++ b/libcelt/stack_alloc.h
@@ -84,32 +84,52 @@
* @param type Type of element
*/
+
+#if defined(VAR_ARRAYS)
+
+#define VARDECL(var)
+#define ALLOC(var, size, type) type var[size]
+#define SAVE_STACK
+#define RESTORE_STACK
+
+#elif defined(USE_ALLOCA)
+
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+#define SAVE_STACK
+#define RESTORE_STACK
+
+#else
+
#ifdef ENABLE_VALGRIND
#include <valgrind/memcheck.h>
+#define ALLOC_STACK(stack) (stack = (stack==0) ? celt_alloc_scratch(30000) : stack, VALGRIND_MAKE_NOACCESS(stack, 1000))
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
#define PUSH(stack, size, type) (VALGRIND_MAKE_NOACCESS(stack, 1000),ALIGN((stack),sizeof(type)),VALGRIND_MAKE_WRITABLE(stack, ((size)*sizeof(type))),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_NOACCESS(global_stack, 1000))
#else
+#define ALLOC_STACK(stack) (stack = (stack==0) ? celt_alloc_scratch(30000) : stack)
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
-
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+#define RESTORE_STACK (global_stack = _saved_stack)
#endif
-#if defined(VAR_ARRAYS)
-#define VARDECL(var)
-#define ALLOC(var, size, type) type var[size]
-#elif defined(USE_ALLOCA)
-#define VARDECL(var) var
-#define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+#ifdef CELT_C
+char *global_stack=0;
#else
-/*#define VARDECL(var) var
-#define ALLOC(var, size, type) var = PUSH(stack, size, type)*/
-#error scratchpad not yet supported, you need to define either VAR_ARRAYS or USE_ALLOCA
+extern char *global_stack;
+#endif
+
+#include "os_support.h"
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
+#define SAVE_STACK char *_saved_stack; ALLOC_STACK(global_stack);_saved_stack = global_stack;
#endif
--- a/libcelt/testcelt.c
+++ b/libcelt/testcelt.c
@@ -106,6 +106,7 @@
{
VARDECL(celt_int16_t *in);
VARDECL(celt_int16_t *out);
+ SAVE_STACK;
ALLOC(in, frame_size*channels, celt_int16_t);
ALLOC(out, frame_size*channels, celt_int16_t);
fread(in, sizeof(short), frame_size*channels, fin);
@@ -115,6 +116,7 @@
if (len <= 0)
{
fprintf (stderr, "celt_encode() returned %d\n", len);
+ RESTORE_STACK;
return 1;
}
/* This is to simulate packet loss */
@@ -132,6 +134,7 @@
count++;
fwrite(out, sizeof(short), (frame_size-skip)*channels, fout);
skip = 0;
+ RESTORE_STACK;
}
celt_encoder_destroy(enc);
celt_decoder_destroy(dec);
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -69,6 +69,7 @@
celt_word32_t Ryp, Ryy, Rpp;
celt_word32_t g;
VARDECL(celt_norm_t *y);
+ SAVE_STACK;
#ifdef FIXED_POINT
int yshift = 14-EC_ILOG(K);
#endif
@@ -104,6 +105,7 @@
for (i=0;i<N;i++)
X[i] = P[i] + MULT16_32_Q14(y[i], g);
+ RESTORE_STACK;
}
/** All the info necessary to keep track of a hypothesis during the search */
@@ -128,6 +130,7 @@
VARDECL(celt_norm_t **ny);
VARDECL(int **iy);
VARDECL(int **iny);
+ SAVE_STACK;
int i, j, k, m;
int pulsesLeft;
VARDECL(celt_word32_t *xy);
@@ -342,6 +345,7 @@
due to the recursive computation used in quantisation.
Not quite sure whether we need that or not */
mix_pitch_and_residual(iy[0], X, N, K, P, alpha);
+ RESTORE_STACK;
}
/** Decode pulse vector and combine the result with the pitch vector to produce
@@ -349,9 +353,11 @@
void alg_unquant(celt_norm_t *X, int N, int K, celt_norm_t *P, celt_word16_t alpha, ec_dec *dec)
{
VARDECL(int *iy);
+ SAVE_STACK;
ALLOC(iy, N, int);
decode_pulses(iy, N, K, dec);
mix_pitch_and_residual(iy, X, N, K, P, alpha);
+ RESTORE_STACK;
}