shithub: opus

Download patch

ref: 8600f69f796f48c359439577d092293e167be58e
parent: 8b2a59235f17cdcd88780f851ba90398c1208315
author: Jean-Marc Valin <[email protected]>
date: Fri Feb 29 10:14:12 EST 2008

Initial support for a managed stack/scratchpad. Still needs some work.

--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -118,6 +118,7 @@
    int i;
    VARDECL(celt_ener_t *tmpE);
    VARDECL(celt_sig_t *freq);
+   SAVE_STACK;
    ALLOC(tmpE, m->nbEBands*m->nbChannels, celt_ener_t);
    ALLOC(freq, m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1], celt_sig_t);
    for (i=0;i<m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1];i++)
@@ -124,14 +125,17 @@
       freq[i] = SHL32(EXTEND32(X[i]), 10);
    compute_band_energies(m, freq, tmpE);
    normalise_bands(m, freq, X, tmpE);
+   RESTORE_STACK;
 }
 #else
 void renormalise_bands(const CELTMode *m, celt_norm_t *X)
 {
    VARDECL(celt_ener_t *tmpE);
+   SAVE_STACK;
    ALLOC(tmpE, m->nbEBands*m->nbChannels, celt_ener_t);
    compute_band_energies(m, X, tmpE);
    normalise_bands(m, X, X, tmpE);
+   RESTORE_STACK;
 }
 #endif
 
@@ -224,7 +228,8 @@
    VARDECL(celt_norm_t *norm);
    VARDECL(int *pulses);
    VARDECL(int *offsets);
-   
+   SAVE_STACK;
+
    B = m->nbMdctBlocks*m->nbChannels;
    
    ALLOC(norm, B*eBands[m->nbEBands+1], celt_norm_t);
@@ -277,6 +282,7 @@
    }
    for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
       X[i] = 0;
+   RESTORE_STACK;
 }
 
 /* Decoding of the residual */
@@ -288,7 +294,8 @@
    VARDECL(celt_norm_t *norm);
    VARDECL(int *pulses);
    VARDECL(int *offsets);
-   
+   SAVE_STACK;
+
    B = m->nbMdctBlocks*m->nbChannels;
    
    ALLOC(norm, B*eBands[m->nbEBands+1], celt_norm_t);
@@ -335,6 +342,7 @@
    }
    for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
       X[i] = 0;
+   RESTORE_STACK;
 }
 
 void stereo_mix(const CELTMode *m, celt_norm_t *X, celt_ener_t *bank, int dir)
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -33,6 +33,8 @@
 #include "config.h"
 #endif
 
+#define CELT_C
+
 #include "os_support.h"
 #include "mdct.h"
 #include <math.h>
@@ -182,6 +184,7 @@
    celt_word32_t E = 0;
    VARDECL(celt_word32_t *x);
    VARDECL(celt_word32_t *tmp);
+   SAVE_STACK;
    ALLOC(x, 2*N, celt_word32_t);
    ALLOC(tmp, N, celt_word32_t);
    for (c=0;c<C;c++)
@@ -200,6 +203,7 @@
             out[C*B*j+C*i+c] = tmp[j];
       }
    }
+   RESTORE_STACK;
    return E;
 }
 
@@ -209,6 +213,7 @@
    int i, c, N4;
    VARDECL(celt_word32_t *x);
    VARDECL(celt_word32_t *tmp);
+   SAVE_STACK;
    ALLOC(x, 2*N, celt_word32_t);
    ALLOC(tmp, N, celt_word32_t);
    N4 = (N-overlap)/2;
@@ -231,6 +236,7 @@
             mdct_overlap[C*j+c] = x[N+N4+j];
       }
    }
+   RESTORE_STACK;
 }
 
 int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, int nbCompressedBytes)
@@ -245,6 +251,7 @@
    VARDECL(celt_norm_t *P);
    VARDECL(celt_ener_t *bandE);
    VARDECL(celt_pgain_t *gains);
+   SAVE_STACK;
 
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
@@ -414,6 +421,7 @@
       if (nbBytes > nbCompressedBytes)
       {
          celt_warning_int ("got too many bytes:", nbBytes);
+         RESTORE_STACK;
          return CELT_INTERNAL_ERROR;
       }
       /*printf ("%d\n", *nbBytes);*/
@@ -427,6 +435,7 @@
    ec_byte_reset(&st->buf);
    ec_enc_init(&st->enc,&st->buf);
 
+   RESTORE_STACK;
    return nbCompressedBytes;
 }
 
@@ -539,6 +548,7 @@
    int i, c, N, B, C;
    int pitch_index;
    VARDECL(celt_sig_t *freq);
+   SAVE_STACK;
    N = st->block_size;
    B = st->nb_blocks;
    C = st->mode->nbChannels;
@@ -567,6 +577,7 @@
          }
       }
    }
+   RESTORE_STACK;
 }
 
 int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm)
@@ -581,6 +592,7 @@
    VARDECL(celt_norm_t *P);
    VARDECL(celt_ener_t *bandE);
    VARDECL(celt_pgain_t *gains);
+   SAVE_STACK;
 
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
@@ -596,10 +608,14 @@
    ALLOC(gains, st->mode->nbPBands, celt_pgain_t);
    
    if (check_mode(st->mode) != CELT_OK)
+   {
+      RESTORE_STACK;
       return CELT_INVALID_MODE;
+   }
    if (data == NULL)
    {
       celt_decode_lost(st, pcm);
+      RESTORE_STACK;
       return 0;
    }
    
@@ -676,6 +692,7 @@
          if (ec_dec_uint(&dec, 2) != val)
          {
             celt_warning("decode error");
+            RESTORE_STACK;
             return CELT_CORRUPTED_DATA;
          }
          val = 1-val;
@@ -682,6 +699,7 @@
       }
    }
 
+   RESTORE_STACK;
    return 0;
    /*printf ("\n");*/
 }
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -111,13 +111,17 @@
 celt_uint32_t ncwrs(int _n,int _m)
 {
    int i;
+   celt_uint32_t ret;
    VARDECL(celt_uint32_t *nc);
+   SAVE_STACK;
    ALLOC(nc,_n+1, celt_uint32_t);
    for (i=0;i<_n+1;i++)
       nc[i] = 1;
    for (i=0;i<_m;i++)
       next_ncwrs32(nc, _n+1, 0);
-   return nc[_n];
+   ret = nc[_n];
+   RESTORE_STACK;
+   return ret;
 }
 
 /*Returns the numer of ways of choosing _m elements from a set of size _n with
@@ -125,13 +129,17 @@
 celt_uint64_t ncwrs64(int _n,int _m)
 {
    int i;
+   celt_uint64_t ret;
    VARDECL(celt_uint64_t *nc);
+   SAVE_STACK;
    ALLOC(nc,_n+1, celt_uint64_t);
    for (i=0;i<_n+1;i++)
       nc[i] = 1;
    for (i=0;i<_m;i++)
       next_ncwrs64(nc, _n+1, 0);
-   return nc[_n];
+   ret = nc[_n];
+   RESTORE_STACK;
+   return ret;
 }
 
 
@@ -143,6 +151,7 @@
   int j;
   int k;
   VARDECL(celt_uint32_t *nc);
+  SAVE_STACK;
   ALLOC(nc,_n+1, celt_uint32_t);
   for (j=0;j<_n+1;j++)
     nc[j] = 1;
@@ -176,6 +185,7 @@
     else
       prev_ncwrs32(nc, _n+1, 1);
   }
+  RESTORE_STACK;
 }
 
 /*Returns the index of the given combination of _m elements chosen from a set
@@ -187,6 +197,7 @@
   int      j;
   int      k;
   VARDECL(celt_uint32_t *nc);
+  SAVE_STACK;
   ALLOC(nc,_n+1, celt_uint32_t);
   for (j=0;j<_n+1;j++)
     nc[j] = 1;
@@ -218,6 +229,7 @@
     }
     if((k==0||_x[k]!=_x[k-1])&&_s[k])i+=p>>1;
   }
+  RESTORE_STACK;
   return i;
 }
 
@@ -229,6 +241,7 @@
   int j;
   int k;
   VARDECL(celt_uint64_t *nc);
+  SAVE_STACK;
   ALLOC(nc,_n+1, celt_uint64_t);
   for (j=0;j<_n+1;j++)
     nc[j] = 1;
@@ -262,6 +275,7 @@
     else
       prev_ncwrs64(nc, _n+1, 1);
   }
+  RESTORE_STACK;
 }
 
 /*Returns the index of the given combination of _m elements chosen from a set
@@ -273,6 +287,7 @@
   int           j;
   int           k;
   VARDECL(celt_uint64_t *nc);
+  SAVE_STACK;
   ALLOC(nc,_n+1, celt_uint64_t);
   for (j=0;j<_n+1;j++)
     nc[j] = 1;
@@ -304,6 +319,7 @@
     }
     if((k==0||_x[k]!=_x[k-1])&&_s[k])i+=p>>1;
   }
+  RESTORE_STACK;
   return i;
 }
 
@@ -350,6 +366,7 @@
 {
    VARDECL(int *comb);
    VARDECL(int *signs);
+   SAVE_STACK;
    
    ALLOC(comb, K, int);
    ALLOC(signs, K, int);
@@ -366,6 +383,7 @@
       id = icwrs64(N, K, comb, signs, &bound);
       ec_enc_uint64(enc,id,bound);
    }
+   RESTORE_STACK;
 }
 
 void decode_pulses(int *_y, int N, int K, ec_dec *dec)
@@ -372,6 +390,7 @@
 {
    VARDECL(int *comb);
    VARDECL(int *signs);
+   SAVE_STACK;
    
    ALLOC(comb, K, int);
    ALLOC(signs, K, int);
@@ -383,5 +402,6 @@
       cwrsi64(N, K, ec_dec_uint64(dec, ncwrs64(N, K)), comb, signs);
       comb2pulse(N, K, _y, comb, signs);
    }
+   RESTORE_STACK;
 }
 
--- a/libcelt/mdct.c
+++ b/libcelt/mdct.c
@@ -91,6 +91,7 @@
    int i;
    int N, N2, N4;
    VARDECL(kiss_fft_scalar *f);
+   SAVE_STACK;
    N = l->n;
    N2 = N/2;
    N4 = N/4;
@@ -126,6 +127,7 @@
       out[2*i]      = -S_MUL(f[2*i+1],l->trig[i+N4]) + S_MUL(f[2*i]  ,l->trig[i]);
       out[N2-1-2*i] = -S_MUL(f[2*i]  ,l->trig[i+N4]) - S_MUL(f[2*i+1],l->trig[i]);
    }
+   RESTORE_STACK;
 }
 
 
@@ -134,6 +136,7 @@
    int i;
    int N, N2, N4, N8;
    VARDECL(kiss_fft_scalar *f);
+   SAVE_STACK;
    N = l->n;
    N2 = N/2;
    N4 = N/4;
@@ -173,6 +176,7 @@
       out[i]     =-out[N2-i-1];
       out[N-i-1] = out[N2+i];
    }
+   RESTORE_STACK;
 }
 
 
--- a/libcelt/pitch.c
+++ b/libcelt/pitch.c
@@ -53,6 +53,7 @@
    VARDECL(celt_word32_t *X);
    VARDECL(celt_word32_t *Y);
    VARDECL(celt_mask_t *curve);
+   SAVE_STACK;
    int n2 = lag/2;
    ALLOC(xx, lag*C, celt_word32_t);
    ALLOC(yy, lag*C, celt_word32_t);
@@ -109,4 +110,5 @@
    /*printf ("\n");
    printf ("%d %f\n", *pitch, max_corr);
    printf ("%d\n", *pitch);*/
+   RESTORE_STACK;
 }
--- a/libcelt/psy.c
+++ b/libcelt/psy.c
@@ -126,6 +126,7 @@
 {
    int i;
    VARDECL(float *psd);
+   SAVE_STACK;
    int N=len/2;
    ALLOC(psd, N, float);
    psd[0] = X[0]*1.f*X[0];
@@ -134,7 +135,7 @@
    /* TODO: Do tone masking */
    /* Noise masking */
    spreading_func(decay, psd, mask, N);
-   
+   RESTORE_STACK;  
 }
 
 void compute_mdct_masking(struct PsyDecay *decay, celt_word32_t *X, celt_mask_t *mask, int len)
@@ -141,6 +142,7 @@
 {
    int i;
    VARDECL(float *psd);
+   SAVE_STACK;
    ALLOC(psd, len, float);
    for (i=0;i<len;i++)
       mask[i] = X[i]*X[i];
@@ -152,5 +154,5 @@
    /* TODO: Do tone masking */
    /* Noise masking */
    spreading_func(decay, psd, mask, len);
-   
+   RESTORE_STACK;  
 }
--- a/libcelt/quant_bands.c
+++ b/libcelt/quant_bands.c
@@ -90,6 +90,7 @@
    celt_word16_t prev = 0;
    celt_word16_t coef = m->ePredCoef;
    VARDECL(celt_word16_t *error);
+   SAVE_STACK;
    /* The .7 is a heuristic */
    celt_word16_t beta = MULT16_16_Q15(QCONST16(.7f,15),coef);
    
@@ -157,6 +158,7 @@
    /*printf ("%d\n", ec_enc_tell(enc, 0)-9);*/
 
    /*printf ("\n");*/
+   RESTORE_STACK;
 }
 
 static void unquant_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_dec *dec)
@@ -210,6 +212,7 @@
 void quant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_enc *enc)
 {
    int C;
+   SAVE_STACK;
    
    C = m->nbChannels;
 
@@ -263,6 +266,7 @@
       celt_fatal("more than 2 channels not supported");
    }
 #endif
+   RESTORE_STACK;
 }
 
 
@@ -270,6 +274,7 @@
 void unquant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_dec *dec)
 {
    int C;   
+   SAVE_STACK;
    C = m->nbChannels;
 
    if (C==1)
@@ -286,4 +291,5 @@
             eBands[C*i+c] = E[i];
       }
    }
+   RESTORE_STACK;
 }
--- a/libcelt/quant_pitch.c
+++ b/libcelt/quant_pitch.c
@@ -66,6 +66,7 @@
 {
    int i, id;
    VARDECL(float *g2);
+   SAVE_STACK;
    ALLOC(g2, len, float);
    /*for (i=0;i<len;i++) printf ("%f ", gains[i]);printf ("\n");*/
    for (i=0;i<len;i++)
@@ -75,6 +76,7 @@
    /*for (i=0;i<len;i++) printf ("%f ", pgain_table[id*len+i]);printf ("\n");*/
    for (i=0;i<len;i++)
       gains[i] = PGAIN_SCALING*(sqrt(1-(1-pgain_table[id*len+i])*(1-pgain_table[id*len+i])));
+   RESTORE_STACK;
    return id!=0;
 }
 
--- a/libcelt/rate.c
+++ b/libcelt/rate.c
@@ -192,6 +192,7 @@
    int j;
    int firstpass;
    VARDECL(int *bits);
+   SAVE_STACK;
    ALLOC(bits, len, int);
    lo = 0;
    hi = 1<<BITRES;
@@ -235,14 +236,16 @@
             break;
       }
    }
+   RESTORE_STACK;
    return (out+BITROUND) >> BITRES;
 }
 
 int compute_allocation(const CELTMode *m, int *offsets, int total, int *pulses)
 {
-   int lo, hi, len;
+   int lo, hi, len, ret;
    VARDECL(int *bits1);
    VARDECL(int *bits2);
+   SAVE_STACK;
    
    len = m->nbEBands;
    ALLOC(bits1, len, int);
@@ -278,7 +281,9 @@
          if (bits2[j] < 0)
             bits2[j] = 0;
       }
-      return interp_bits2pulses(m, bits1, bits2, total, pulses, len);
+      ret = interp_bits2pulses(m, bits1, bits2, total, pulses, len);
+      RESTORE_STACK;
+      return ret;
    }
 }
 
--- a/libcelt/stack_alloc.h
+++ b/libcelt/stack_alloc.h
@@ -84,32 +84,52 @@
  * @param type Type of element
  */
 
+
+#if defined(VAR_ARRAYS)
+
+#define VARDECL(var) 
+#define ALLOC(var, size, type) type var[size]
+#define SAVE_STACK
+#define RESTORE_STACK
+
+#elif defined(USE_ALLOCA)
+
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+#define SAVE_STACK
+#define RESTORE_STACK
+
+#else
+
 #ifdef ENABLE_VALGRIND
 
 #include <valgrind/memcheck.h>
 
+#define ALLOC_STACK(stack) (stack = (stack==0) ? celt_alloc_scratch(30000) : stack, VALGRIND_MAKE_NOACCESS(stack, 1000))
 #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
 
 #define PUSH(stack, size, type) (VALGRIND_MAKE_NOACCESS(stack, 1000),ALIGN((stack),sizeof(type)),VALGRIND_MAKE_WRITABLE(stack, ((size)*sizeof(type))),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_NOACCESS(global_stack, 1000))
 
 #else
 
+#define ALLOC_STACK(stack) (stack = (stack==0) ? celt_alloc_scratch(30000) : stack)
 #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
-
 #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+#define RESTORE_STACK (global_stack = _saved_stack)
 
 #endif
 
-#if defined(VAR_ARRAYS)
-#define VARDECL(var) 
-#define ALLOC(var, size, type) type var[size]
-#elif defined(USE_ALLOCA)
-#define VARDECL(var) var
-#define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+#ifdef CELT_C
+char *global_stack=0;
 #else
-/*#define VARDECL(var) var
-#define ALLOC(var, size, type) var = PUSH(stack, size, type)*/
-#error scratchpad not yet supported, you need to define either VAR_ARRAYS or USE_ALLOCA
+extern char *global_stack;
+#endif
+
+#include "os_support.h"
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
+#define SAVE_STACK char *_saved_stack; ALLOC_STACK(global_stack);_saved_stack = global_stack;
 #endif
 
 
--- a/libcelt/testcelt.c
+++ b/libcelt/testcelt.c
@@ -106,6 +106,7 @@
    {
       VARDECL(celt_int16_t *in);
       VARDECL(celt_int16_t *out);
+      SAVE_STACK;
       ALLOC(in, frame_size*channels, celt_int16_t);
       ALLOC(out, frame_size*channels, celt_int16_t);
       fread(in, sizeof(short), frame_size*channels, fin);
@@ -115,6 +116,7 @@
       if (len <= 0)
       {
          fprintf (stderr, "celt_encode() returned %d\n", len);
+         RESTORE_STACK;
          return 1;
       }
       /* This is to simulate packet loss */
@@ -132,6 +134,7 @@
       count++;
       fwrite(out, sizeof(short), (frame_size-skip)*channels, fout);
       skip = 0;
+      RESTORE_STACK;
    }
    celt_encoder_destroy(enc);
    celt_decoder_destroy(dec);
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -69,6 +69,7 @@
    celt_word32_t Ryp, Ryy, Rpp;
    celt_word32_t g;
    VARDECL(celt_norm_t *y);
+   SAVE_STACK;
 #ifdef FIXED_POINT
    int yshift = 14-EC_ILOG(K);
 #endif
@@ -104,6 +105,7 @@
 
    for (i=0;i<N;i++)
       X[i] = P[i] + MULT16_32_Q14(y[i], g);
+   RESTORE_STACK;
 }
 
 /** All the info necessary to keep track of a hypothesis during the search */
@@ -128,6 +130,7 @@
    VARDECL(celt_norm_t **ny);
    VARDECL(int **iy);
    VARDECL(int **iny);
+   SAVE_STACK;
    int i, j, k, m;
    int pulsesLeft;
    VARDECL(celt_word32_t *xy);
@@ -342,6 +345,7 @@
       due to the recursive computation used in quantisation.
       Not quite sure whether we need that or not */
    mix_pitch_and_residual(iy[0], X, N, K, P, alpha);
+   RESTORE_STACK;
 }
 
 /** Decode pulse vector and combine the result with the pitch vector to produce
@@ -349,9 +353,11 @@
 void alg_unquant(celt_norm_t *X, int N, int K, celt_norm_t *P, celt_word16_t alpha, ec_dec *dec)
 {
    VARDECL(int *iy);
+   SAVE_STACK;
    ALLOC(iy, N, int);
    decode_pulses(iy, N, K, dec);
    mix_pitch_and_residual(iy, X, N, K, P, alpha);
+   RESTORE_STACK;
 }