shithub: opus

--- a/celt/celt_decoder.c

+++ b/celt/celt_decoder.c

@@ -424,10 +424,12 @@

       opus_val16 fade = Q15ONE;

       int pitch_index;

       VARDECL(opus_val32, etmp);

+      VARDECL(opus_val16, exc);

       if (loss_count == 0)

-         opus_val16 lp_pitch_buf[DECODE_BUFFER_SIZE>>1];

+         VARDECL( opus_val16, lp_pitch_buf );

+         ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );

          pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);

          pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,

                DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,

@@ -440,14 +442,12 @@

       ALLOC(etmp, overlap, opus_val32);

+      ALLOC(exc, MAX_PERIOD, opus_val16);

       window = mode->window;

       c=0; do {

-         opus_val16 exc[MAX_PERIOD];

-         opus_val32 ac[LPC_ORDER+1];

          opus_val16 decay;

          opus_val16 attenuation;

          opus_val32 S1=0;

-         opus_val16 lpc_mem[LPC_ORDER];

          celt_sig *buf;

          int extrapolation_offset;

          int extrapolation_len;

@@ -461,6 +461,7 @@

          if (loss_count == 0)

+            opus_val32 ac[LPC_ORDER+1];

             /* Compute LPC coefficients for the last MAX_PERIOD samples before

                the first loss so we can work in the excitation-filter domain. */

             _celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD);

@@ -487,14 +488,17 @@

          exc_length = IMIN(2*pitch_index, MAX_PERIOD);

          /* Initialize the LPC history with the samples just before the start

             of the region for which we're computing the excitation. */

-         for (i=0;i<LPC_ORDER;i++)

-            lpc_mem[i] =

-                  ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);

+            opus_val16 lpc_mem[LPC_ORDER];

+            for (i=0;i<LPC_ORDER;i++)

+            {

+               lpc_mem[i] =

+                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);

+            }

+            /* Compute the excitation for exc_length samples before the loss. */

+            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,

+                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);

-         /* Compute the excitation for exc_length samples before the loss. */

-         celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,

-               exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);

          /* Check if the waveform is decaying, and if so how fast.

             We do this to avoid adding energy when concealing in a segment

@@ -547,14 +551,18 @@

             S1 += SHR32(MULT16_16(tmp, tmp), 8);

-         /* Copy the last decoded samples (prior to the overlap region) to

-            synthesis filter memory so we can have a continuous signal. */

-         for (i=0;i<LPC_ORDER;i++)

-            lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);

-         /* Apply the synthesis filter to convert the excitation back into the

-            signal domain. */

-         celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,

-               buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, lpc_mem);

+         {

+            opus_val16 lpc_mem[LPC_ORDER];

+            /* Copy the last decoded samples (prior to the overlap region) to

+               synthesis filter memory so we can have a continuous signal. */

+            for (i=0;i<LPC_ORDER;i++)

+               lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);

+            /* Apply the synthesis filter to convert the excitation back into

+               the signal domain. */

+            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,

+                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,

+                  lpc_mem);

+         }

          /* Check if the synthesis energy is higher than expected, which can

             happen with the signal changes during our window. If so,

--- a/silk/CNG.c

+++ b/silk/CNG.c

@@ -30,6 +30,7 @@

 #endif

 #include "main.h"

+#include "stack_alloc.h"

 /* Generates excitation for CNG LPC synthesis */

 static inline void silk_CNG_exc(

@@ -86,8 +87,8 @@

     opus_int   i, subfr;

     opus_int32 sum_Q6, max_Gain_Q16;

     opus_int16 A_Q12[ MAX_LPC_ORDER ];

-    opus_int32 CNG_sig_Q10[ MAX_FRAME_LENGTH + MAX_LPC_ORDER ];

     silk_CNG_struct *psCNG = &psDec->sCNG;

+    SAVE_STACK;

     if( psDec->fs_kHz != psCNG->fs_kHz ) {

         /* Reset state */

@@ -123,7 +124,10 @@

     /* Add CNG when packet is lost or during DTX */

     if( psDec->lossCnt ) {

+        VARDECL( opus_int32, CNG_sig_Q10 );

+        ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );

         /* Generate CNG excitation */

         silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );

@@ -164,4 +168,5 @@

     } else {

         silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order *  sizeof( opus_int32 ) );

+    RESTORE_STACK;

--- a/silk/NLSF_encode.c

+++ b/silk/NLSF_encode.c

@@ -30,6 +30,7 @@

 #endif

 #include "main.h"

+#include "stack_alloc.h"

 /***********************/

 /* NLSF vector encoder */

@@ -46,10 +47,10 @@

     opus_int         i, s, ind1, bestIndex, prob_Q8, bits_q7;

     opus_int32       W_tmp_Q9;

-    opus_int32       err_Q26[      NLSF_VQ_MAX_VECTORS ];

-    opus_int32       RD_Q25[       NLSF_VQ_MAX_SURVIVORS ];

-    opus_int         tempIndices1[ NLSF_VQ_MAX_SURVIVORS ];

-    opus_int8        tempIndices2[ NLSF_VQ_MAX_SURVIVORS * MAX_LPC_ORDER ];

+    VARDECL( opus_int32, err_Q26 );

+    VARDECL( opus_int32, RD_Q25 );

+    VARDECL( opus_int, tempIndices1 );

+    VARDECL( opus_int8, tempIndices2 );

     opus_int16       res_Q15[      MAX_LPC_ORDER ];

     opus_int16       res_Q10[      MAX_LPC_ORDER ];

     opus_int16       NLSF_tmp_Q15[ MAX_LPC_ORDER ];

@@ -58,6 +59,7 @@

     opus_uint8       pred_Q8[      MAX_LPC_ORDER ];

     opus_int16       ec_ix[        MAX_LPC_ORDER ];

     const opus_uint8 *pCB_element, *iCDF_ptr;

+    SAVE_STACK;

     silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );

     silk_assert( signalType >= 0 && signalType <= 2 );

@@ -67,11 +69,16 @@

     silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );

     /* First stage: VQ */

+    ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );

     silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );

     /* Sort the quantization errors */

+    ALLOC( tempIndices1, nSurvivors, opus_int );

     silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );

+    ALLOC( RD_Q25, nSurvivors, opus_int32 );

+    ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );

     /* Loop over survivors */

     for( s = 0; s < nSurvivors; s++ ) {

         ind1 = tempIndices1[ s ];

@@ -125,4 +132,5 @@

     silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );

     return RD_Q25[ 0 ];

+    RESTORE_STACK;

--- a/silk/NSQ.c

+++ b/silk/NSQ.c

@@ -30,6 +30,7 @@

 #endif

 #include "main.h"

+#include "stack_alloc.h"

 static inline void silk_nsq_scale_states(

     const silk_encoder_state *psEncC,           /* I    Encoder State                   */

@@ -88,11 +89,12 @@

     opus_int            k, lag, start_idx, LSF_interpolation_flag;

     const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;

     opus_int16          *pxq;

-    opus_int32          sLTP_Q15[ 2 * MAX_FRAME_LENGTH ];

-    opus_int16          sLTP[     2 * MAX_FRAME_LENGTH ];

+    VARDECL( opus_int32, sLTP_Q15 );

+    VARDECL( opus_int16, sLTP );

     opus_int32          HarmShapeFIRPacked_Q14;

     opus_int            offset_Q10;

-    opus_int32          x_sc_Q10[ MAX_SUB_FRAME_LENGTH ];

+    VARDECL( opus_int32, x_sc_Q10 );

+    SAVE_STACK;

     NSQ->rand_seed = psIndices->Seed;

@@ -109,6 +111,10 @@

         LSF_interpolation_flag = 1;

+    ALLOC( sLTP_Q15,

+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );

+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );

+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );

     /* Set up pointers to start of sub frame */

     NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;

     NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;

@@ -160,6 +166,7 @@

     /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */

     silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );

     silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );

+    RESTORE_STACK;

 /***********************************/

--- a/silk/NSQ_del_dec.c

+++ b/silk/NSQ_del_dec.c

@@ -30,6 +30,7 @@

 #endif

 #include "main.h"

+#include "stack_alloc.h"

 typedef struct {

     opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];

@@ -54,6 +55,8 @@

     opus_int32 LPC_exc_Q14;

 } NSQ_sample_struct;

+typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];

 static inline void silk_nsq_del_dec_scale_states(

     const silk_encoder_state *psEncC,               /* I    Encoder State                       */

     silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */

@@ -123,17 +126,18 @@

     opus_int            i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;

     opus_int            last_smple_idx, smpl_buf_idx, decisionDelay;

-    const opus_int16 	*A_Q12, *B_Q14, *AR_shp_Q13;

+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;

     opus_int16          *pxq;

-    opus_int32          sLTP_Q15[ 2 * MAX_FRAME_LENGTH ];

-    opus_int16          sLTP[     2 * MAX_FRAME_LENGTH ];

+    VARDECL( opus_int32, sLTP_Q15 );

+    VARDECL( opus_int16, sLTP );

     opus_int32          HarmShapeFIRPacked_Q14;

     opus_int            offset_Q10;

     opus_int32          RDmin_Q10, Gain_Q10;

-    opus_int32          x_sc_Q10[ MAX_SUB_FRAME_LENGTH ];

-    opus_int32          delayedGain_Q10[  DECISION_DELAY ];

-    NSQ_del_dec_struct  psDelDec[ MAX_DEL_DEC_STATES ];

+    VARDECL( opus_int32, x_sc_Q10 );

+    VARDECL( opus_int32, delayedGain_Q10 );

+    VARDECL( NSQ_del_dec_struct, psDelDec );

     NSQ_del_dec_struct  *psDD;

+    SAVE_STACK;

     /* Set unvoiced lag to the previous one, overwrite later for voiced */

     lag = NSQ->lagPrev;

@@ -141,6 +145,7 @@

     silk_assert( NSQ->prev_gain_Q16 != 0 );

     /* Initialize delayed decision states */

+    ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );

     silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );

     for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {

         psDD                 = &psDelDec[ k ];

@@ -175,6 +180,11 @@

         LSF_interpolation_flag = 1;

+    ALLOC( sLTP_Q15,

+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );

+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );

+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );

+    ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );

     /* Set up pointers to start of sub frame */

     pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];

     NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;

@@ -287,6 +297,7 @@

     /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */

     silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );

     silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );

+    RESTORE_STACK;

 /******************************************/

@@ -328,11 +339,13 @@

     opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;

     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;

     opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;

-    NSQ_sample_struct  psSampleState[ MAX_DEL_DEC_STATES ][ 2 ];

+    VARDECL( NSQ_sample_pair, psSampleState );

     NSQ_del_dec_struct *psDD;

     NSQ_sample_struct  *psSS;

+    SAVE_STACK;

     silk_assert( nStatesDelayedDecision > 0 );

+    ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );

     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];

     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];

@@ -614,6 +627,7 @@

         psDD = &psDelDec[ k ];

         silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );

+    RESTORE_STACK;

 static inline void silk_nsq_del_dec_scale_states(

--- a/silk/VAD.c

+++ b/silk/VAD.c

@@ -30,6 +30,7 @@

 #endif

 #include "main.h"

+#include "stack_alloc.h"

 /* Silk VAD noise level estimation */

 static inline void silk_VAD_GetNoiseLevels(

@@ -82,15 +83,19 @@

     opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;

-    opus_int   decimated_framelength, dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;

+    opus_int   decimated_framelength1, decimated_framelength2;

+    opus_int   decimated_framelength;

+    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;

     opus_int32 sumSquared, smooth_coef_Q16;

     opus_int16 HPstateTmp;

-    opus_int16 X[ VAD_N_BANDS ][ MAX_FRAME_LENGTH / 2 ];

+    VARDECL( opus_int16, X );

     opus_int32 Xnrg[ VAD_N_BANDS ];

     opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];

     opus_int32 speech_nrg, x_tmp;

+    opus_int   X_offset[ VAD_N_BANDS ];

     opus_int   ret = 0;

     silk_VAD_state *psSilk_VAD = &psEncC->sVAD;

+    SAVE_STACK;

     /* Safety checks */

     silk_assert( VAD_N_BANDS == 4 );

@@ -101,26 +106,46 @@

     /***********************/

     /* Filter and Decimate */

     /***********************/

+    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );

+    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );

+    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );

+    /* Decimate into 4 bands:

+       0       L      3L       L              3L                             5L

+               -      --       -              --                             --

+               8       8       2               4                              4

+       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |

+       They're arranged to allow the minimal ( frame_length / 4 ) extra

+       scratch space during the downsampling process */

+    X_offset[ 0 ] = 0;

+    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;

+    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;

+    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;

+    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );

     /* 0-8 kHz to 0-4 kHz and 4-8 kHz */

-    silk_ana_filt_bank_1( pIn,          &psSilk_VAD->AnaState[  0 ], &X[ 0 ][ 0 ], &X[ 3 ][ 0 ], psEncC->frame_length );

+    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],

+        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );

     /* 0-4 kHz to 0-2 kHz and 2-4 kHz */

-    silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState1[ 0 ], &X[ 0 ][ 0 ], &X[ 2 ][ 0 ], silk_RSHIFT( psEncC->frame_length, 1 ) );

+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],

+        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );

     /* 0-2 kHz to 0-1 kHz and 1-2 kHz */

-    silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState2[ 0 ], &X[ 0 ][ 0 ], &X[ 1 ][ 0 ], silk_RSHIFT( psEncC->frame_length, 2 ) );

+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],

+        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );

     /*********************************************/

     /* HP filter on lowest band (differentiator) */

     /*********************************************/

-    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );

-    X[ 0 ][ decimated_framelength - 1 ] = silk_RSHIFT( X[ 0 ][ decimated_framelength - 1 ], 1 );

-    HPstateTmp = X[ 0 ][ decimated_framelength - 1 ];

+    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );

+    HPstateTmp = X[ decimated_framelength - 1 ];

     for( i = decimated_framelength - 1; i > 0; i-- ) {

-        X[ 0 ][ i - 1 ]  = silk_RSHIFT( X[ 0 ][ i - 1 ], 1 );

-        X[ 0 ][ i ]     -= X[ 0 ][ i - 1 ];

+        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );

+        X[ i ]     -= X[ i - 1 ];

-    X[ 0 ][ 0 ] -= psSilk_VAD->HPstate;

+    X[ 0 ] -= psSilk_VAD->HPstate;

     psSilk_VAD->HPstate = HPstateTmp;

     /*************************************/

@@ -142,7 +167,8 @@

             for( i = 0; i < dec_subframe_length; i++ ) {

                 /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */

                 /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */

-                x_tmp = silk_RSHIFT( X[ b ][ i + dec_subframe_offset ], 3 );

+                x_tmp = silk_RSHIFT(

+                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );

                 sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );

                 /* Safety check */

@@ -264,6 +290,7 @@

     return( ret );

+    RESTORE_STACK;

 /**************************/

--- a/silk/control_codec.c

+++ b/silk/control_codec.c

@@ -35,6 +35,7 @@

 #include "main_FLP.h"

 #define silk_encoder_state_Fxx      silk_encoder_state_FLP

 #endif

+#include "stack_alloc.h"

 #include "tuning_parameters.h"

 #include "pitch_est_defines.h"

@@ -138,6 +139,7 @@

     opus_int   ret = SILK_NO_ERROR;

     opus_int32 nSamples_temp;

+    SAVE_STACK;

     if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz )

@@ -145,35 +147,38 @@

             /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */

             ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 );

         } else {

-            /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */

-            opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ];

-            silk_resampler_state_struct  temp_resampler_state;

+            VARDECL( opus_int16, x_buf_API_fs_Hz );

+            VARDECL( silk_resampler_state_struct, temp_resampler_state );

 #ifdef FIXED_POINT

             opus_int16 *x_bufFIX = psEnc->x_buf;

 #else

-            opus_int16 x_bufFIX[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];

+            VARDECL( opus_int16, x_bufFIX );

 #endif

+            opus_int32 nAPI_Samples_temp;

             nSamples_temp = silk_LSHIFT( psEnc->sCmn.frame_length, 1 ) + LA_SHAPE_MS * psEnc->sCmn.fs_kHz;

 #ifndef FIXED_POINT

+            ALLOC( x_bufFIX, nSamples_temp, opus_int16 );

             silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp );

 #endif

             /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */

-            ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );

+            ALLOC( temp_resampler_state, 1, silk_resampler_state_struct );

+            ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );

+            /* Calculate number of samples to temporarily upsample */

+            nAPI_Samples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );

             /* Temporary resampling of x_buf data to API_fs_Hz */

-            ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );

+            ALLOC( x_buf_API_fs_Hz, nAPI_Samples_temp, opus_int16 );

+            ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );

-            /* Calculate number of samples that has been temporarily upsampled */

-            nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );

             /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */

             ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 );

             /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */

-            ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp );

+            ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nAPI_Samples_temp );

 #ifndef FIXED_POINT

             silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz );

@@ -183,6 +188,7 @@

     psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz;

+    RESTORE_STACK;

     return ret;

--- a/silk/enc_API.c

+++ b/silk/enc_API.c

@@ -32,6 +32,7 @@

 #include "API.h"

 #include "control.h"

 #include "typedef.h"

+#include "stack_alloc.h"

 #include "structs.h"

 #include "tuning_parameters.h"

 #ifdef FIXED_POINT

@@ -146,12 +147,14 @@

     opus_int   n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;

-    opus_int   nSamplesToBuffer, nBlocksOf10ms, nSamplesFromInput = 0;

+    opus_int   nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;

+    opus_int   nSamplesFromInput = 0, nSamplesFromInputMax;

     opus_int   speech_act_thr_for_switch_Q8;

     opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;

     silk_encoder *psEnc = ( silk_encoder * )encState;

-    opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];

+    VARDECL( opus_int16, buf );

     opus_int transition, curr_block, tot_blocks;

+    SAVE_STACK;

     psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;

@@ -158,6 +161,7 @@

     /* Check values in encoder control structure */

     if( ( ret = check_control_input( encControl ) != 0 ) ) {

         silk_assert( 0 );

+        RESTORE_STACK;

         return ret;

@@ -192,6 +196,7 @@

         /* Only accept input length of 10 ms */

         if( nBlocksOf10ms != 1 ) {

             silk_assert( 0 );

+            RESTORE_STACK;

             return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;

         /* Reset Encoder */

@@ -212,11 +217,13 @@

         /* Only accept input lengths that are a multiple of 10 ms */

         if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {

             silk_assert( 0 );

+            RESTORE_STACK;

             return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;

         /* Make sure no more than one packet can be produced */

         if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {

             silk_assert( 0 );

+            RESTORE_STACK;

             return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;

@@ -227,6 +234,7 @@

         opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;

         if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {

             silk_assert( 0 );

+            RESTORE_STACK;

             return ret;

         if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {

@@ -239,9 +247,16 @@

     silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );

     /* Input buffering/resampling and encoding */

+    nSamplesToBufferMax =

+        10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;

+    nSamplesFromInputMax =

+        silk_DIV32_16( nSamplesToBufferMax *

+                           psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,

+                       psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );

+    ALLOC( buf, nSamplesFromInputMax, opus_int16 );

     while( 1 ) {

         nSamplesToBuffer  = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;

-        nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz );

+        nSamplesToBuffer  = silk_min( nSamplesToBuffer, nSamplesToBufferMax );

         nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );

         /* Resample and write to buffer */

         if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {

@@ -530,6 +545,7 @@

+    RESTORE_STACK;

     return ret;

--- a/silk/encode_pulses.c

+++ b/silk/encode_pulses.c

@@ -30,6 +30,7 @@

 #endif

 #include "main.h"

+#include "stack_alloc.h"

 /*********************************************/

 /* Encode quantization indices of excitation */

@@ -66,14 +67,15 @@

     opus_int   i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0;

     opus_int32 abs_q, minSumBits_Q5, sumBits_Q5;

-    opus_int   abs_pulses[ MAX_FRAME_LENGTH ];

-    opus_int   sum_pulses[ MAX_NB_SHELL_BLOCKS ];

-    opus_int   nRshifts[   MAX_NB_SHELL_BLOCKS ];

+    VARDECL( opus_int, abs_pulses );

+    VARDECL( opus_int, sum_pulses );

+    VARDECL( opus_int, nRshifts );

     opus_int   pulses_comb[ 8 ];

     opus_int   *abs_pulses_ptr;

     const opus_int8 *pulses_ptr;

     const opus_uint8 *cdf_ptr;

     const opus_uint8 *nBits_ptr;

+    SAVE_STACK;

     silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/

@@ -90,6 +92,8 @@

     /* Take the absolute value of the pulses */

+    ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int );

+    silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) );

     for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) {

         abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] );

         abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] );

@@ -98,6 +102,8 @@

     /* Calc sum pulses per shell code frame */

+    ALLOC( sum_pulses, iter, opus_int );

+    ALLOC( nRshifts, iter, opus_int );

     abs_pulses_ptr = abs_pulses;

     for( i = 0; i < iter; i++ ) {

         nRshifts[ i ] = 0;

@@ -196,4 +202,5 @@

     /* Encode signs */

     /****************/

     silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses );

+    RESTORE_STACK;

--- a/silk/fixed/encode_frame_FIX.c

+++ b/silk/fixed/encode_frame_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 #include "tuning_parameters.h"

 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */

@@ -84,9 +85,7 @@

     silk_encoder_control_FIX sEncCtrl;

     opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;

-    opus_int16   *x_frame, *res_pitch_frame;

-    opus_int32   xfw_Q3[ MAX_FRAME_LENGTH ];

-    opus_int16   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];

+    opus_int16   *x_frame;

     ec_enc       sRangeEnc_copy, sRangeEnc_copy2;

     silk_nsq_state sNSQ_copy, sNSQ_copy2;

     opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;

@@ -95,7 +94,7 @@

     opus_int16   ec_prevLagIndex_copy;

     opus_int     ec_prevSignalType_copy;

     opus_int8    LastGainIndex_copy2;

-    opus_uint8   ec_buf_copy[ 1275 ];

+    SAVE_STACK;

     /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */

     LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;

@@ -105,9 +104,8 @@

     /**************************************************************/

     /* Set up Input Pointers, and insert frame in input buffer   */

     /*************************************************************/

-    /* pointers aligned with start of frame to encode */

-    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */

-    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */

+    /* start of frame to encode */

+    x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;

     /***************************************/

     /* Ensure smooth bandwidth transitions */

@@ -120,6 +118,17 @@

     silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );

     if( !psEnc->sCmn.prefillFlag ) {

+        VARDECL( opus_int32, xfw_Q3 );

+        VARDECL( opus_int16, res_pitch );

+        VARDECL( opus_uint8, ec_buf_copy );

+        opus_int16 *res_pitch_frame;

+        ALLOC( res_pitch,

+               psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length

+                   + psEnc->sCmn.ltp_mem_length, opus_int16 );

+        /* start of pitch LPC residual frame */

+        res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length;

         /*****************************************/

         /* Find pitch lags, initial LPC analysis */

         /*****************************************/

@@ -143,6 +152,7 @@

         /*****************************************/

         /* Prefiltering for noise shaper         */

         /*****************************************/

+        ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 );

         silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );

         /****************************************/

@@ -164,6 +174,7 @@

         seed_copy = psEnc->sCmn.indices.Seed;

         ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;

         ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;

+        ALLOC( ec_buf_copy, 1275, opus_uint8 );

         for( iter = 0; ; iter++ ) {

             if( gainsID == gainsID_lower ) {

                 nBits = nBits_lower;

@@ -299,6 +310,7 @@

     if( psEnc->sCmn.prefillFlag ) {

         /* No payload */

         *pnBytesOut = 0;

+        RESTORE_STACK;

         return ret;

@@ -309,6 +321,7 @@

     /* Payload size */

     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );

+    RESTORE_STACK;

     return ret;

--- a/silk/fixed/find_LPC_FIX.c

+++ b/silk/fixed/find_LPC_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 #include "tuning_parameters.h"

 /* Finds LPC vector from correlations, and converts to NLSF */

@@ -51,7 +52,7 @@

     opus_int     res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q;

     opus_int16   a_tmp_Q12[ MAX_LPC_ORDER ];

     opus_int16   NLSF0_Q15[ MAX_LPC_ORDER ];

-    opus_int16   LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ];

+    SAVE_STACK;

     subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;

@@ -62,6 +63,8 @@

     silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );

     if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {

+        VARDECL( opus_int16, LPC_res );

         /* Optimal solution for last 10 ms */

         silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder );

@@ -81,6 +84,8 @@

         /* Convert to NLSFs */

         silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder );

+        ALLOC( LPC_res, 2 * subfr_length, opus_int16 );

         /* Search over interpolation indices to find the one with lowest residual energy */

         for( k = 3; k >= 0; k-- ) {

             /* Interpolate NLSFs for first half */

@@ -142,4 +147,5 @@

     silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );

+    RESTORE_STACK;

--- a/silk/fixed/find_pitch_lags_FIX.c

+++ b/silk/fixed/find_pitch_lags_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 #include "tuning_parameters.h"

 /* Find pitch lags */

@@ -43,11 +44,13 @@

     opus_int   buf_len, i, scale;

     opus_int32 thrhld_Q13, res_nrg;

     const opus_int16 *x_buf, *x_buf_ptr;

-    opus_int16 Wsig[      FIND_PITCH_LPC_WIN_MAX ], *Wsig_ptr;

+    VARDECL( opus_int16, Wsig );

+    opus_int16 *Wsig_ptr;

     opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];

     opus_int16 rc_Q15[    MAX_FIND_PITCH_LPC_ORDER ];

     opus_int32 A_Q24[     MAX_FIND_PITCH_LPC_ORDER ];

     opus_int16 A_Q12[     MAX_FIND_PITCH_LPC_ORDER ];

+    SAVE_STACK;

     /******************************************/

     /* Set up buffer lengths etc based on Fs  */

@@ -65,6 +68,8 @@

     /* Calculate windowed signal */

+    ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 );

     /* First LA_LTP samples */

     x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;

     Wsig_ptr  = Wsig;

@@ -134,4 +139,5 @@

         psEnc->sCmn.indices.contourIndex = 0;

         psEnc->LTPCorr_Q15 = 0;

+    RESTORE_STACK;

--- a/silk/fixed/find_pred_coefs_FIX.c

+++ b/silk/fixed/find_pred_coefs_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 void silk_find_pred_coefs_FIX(

     silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */

@@ -40,13 +41,14 @@

     opus_int         i;

-    opus_int32       WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];

     opus_int32       invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];

     opus_int16       NLSF_Q15[ MAX_LPC_ORDER ];

     const opus_int16 *x_ptr;

-    opus_int16       *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ];

+    opus_int16       *x_pre_ptr;

+    VARDECL( opus_int16, LPC_in_pre );

     opus_int32       tmp, min_gain_Q16, minInvGain_Q30;

     opus_int         LTP_corrs_rshift[ MAX_NB_SUBFR ];

+    SAVE_STACK;

     /* weighting for weighted least squares */

     min_gain_Q16 = silk_int32_MAX >> 6;

@@ -71,12 +73,19 @@

         local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] );

+    ALLOC( LPC_in_pre,

+           psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder

+               + psEnc->sCmn.frame_length, opus_int16 );

     if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {

+        VARDECL( opus_int32, WLTP );

         /**********/

         /* VOICED */

         /**********/

         silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );

+        ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );

         /* LTP analysis */

         silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,

             res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,

@@ -133,4 +142,5 @@

     /* Copy to prediction struct for use in next frame for interpolation */

     silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );

+    RESTORE_STACK;

--- a/silk/fixed/noise_shape_analysis_FIX.c

+++ b/silk/fixed/noise_shape_analysis_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 #include "tuning_parameters.h"

 /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */

@@ -156,8 +157,9 @@

     opus_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];

     opus_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];

     opus_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];

-    opus_int16   x_windowed[    SHAPE_LPC_WIN_MAX ];

+    VARDECL( opus_int16, x_windowed );

     const opus_int16 *x_ptr, *pitch_res_ptr;

+    SAVE_STACK;

     /* Point to start of first LPC analysis block */

     x_ptr = x - psEnc->sCmn.la_shape;

@@ -258,6 +260,7 @@

     /********************************************/

     /* Compute noise shaping AR coefs and gains */

     /********************************************/

+    ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );

     for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {

         /* Apply window: sine slope followed by flat part followed by cosine slope */

         opus_int shift, slope_part, flat_part;

@@ -437,4 +440,5 @@

         psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );

         psEncCtrl->Tilt_Q14[ k ]          = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16,          2 );

+    RESTORE_STACK;

--- a/silk/fixed/pitch_analysis_core_FIX.c

+++ b/silk/fixed/pitch_analysis_core_FIX.c

@@ -34,15 +34,29 @@

 ********************************************************** */

 #include "SigProc_FIX.h"

 #include "pitch_est_defines.h"

+#include "stack_alloc.h"

 #include "debug.h"

 #define SCRATCH_SIZE    22

+#define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )

+#define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )

+#define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )

+#define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )

+#define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )

+#define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )

+#define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )

+#define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )

+#define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )

+#define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )

+#define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )

+typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];

 /************************************************************/

 /* Internally used functions                                */

 /************************************************************/

 static void silk_P_Ana_calc_corr_st3(

-    opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */

+    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */

     const opus_int16  frame[],                         /* I vector to correlate         */

     opus_int          start_lag,                       /* I lag offset to search around */

     opus_int          sf_length,                       /* I length of a 5 ms subframe   */

@@ -51,7 +65,7 @@

);

 static void silk_P_Ana_calc_energy_st3(

-    opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM energy array */

+    silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */

     const opus_int16  frame[],                         /* I vector to calc energy in    */

     opus_int          start_lag,                       /* I lag offset to search around */

     opus_int          sf_length,                       /* I length of one 5 ms subframe */

@@ -76,30 +90,30 @@

     const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */

-    opus_int16 frame_8kHz[ PE_MAX_FRAME_LENGTH_ST_2 ];

-    opus_int16 frame_4kHz[ PE_MAX_FRAME_LENGTH_ST_1 ];

+    VARDECL( opus_int16, frame_8kHz );

+    VARDECL( opus_int16, frame_4kHz );

     opus_int32 filt_state[ 6 ];

-    opus_int32 scratch_mem[ 3 * PE_MAX_FRAME_LENGTH ];

-    opus_int16 *input_frame_ptr;

+    const opus_int16 *input_frame_ptr;

     opus_int   i, k, d, j;

-    opus_int16 C[ PE_MAX_NB_SUBFR ][ ( PE_MAX_LAG >> 1 ) + 5 ];

+    VARDECL( opus_int16, C );

     const opus_int16 *target_ptr, *basis_ptr;

     opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;

     opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;

-    opus_int16 d_comp[ ( PE_MAX_LAG >> 1 ) + 5 ];

+    VARDECL( opus_int16, d_comp );

     opus_int32 sum, threshold, lag_counter;

     opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;

     opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;

-    opus_int32 energies_st3[  PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];

-    opus_int32 crosscorr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];

+    VARDECL( silk_pe_stage3_vals, energies_st3 );

+    VARDECL( silk_pe_stage3_vals, cross_corr_st3 );

     opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;

-    opus_int   sf_length, sf_length_8kHz, sf_length_4kHz;

-    opus_int   min_lag, min_lag_8kHz, min_lag_4kHz;

-    opus_int   max_lag, max_lag_8kHz, max_lag_4kHz;

+    opus_int   sf_length;

+    opus_int   min_lag;

+    opus_int   max_lag;

     opus_int32 contour_bias_Q15, diff;

     opus_int   nb_cbk_search, cbk_size;

     opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;

     const opus_int8 *Lag_CB_ptr;

+    SAVE_STACK;

     /* Check for valid sampling frequency */

     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );

@@ -115,16 +129,11 @@

     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;

     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;

     sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;

-    sf_length_4kHz    = PE_SUBFR_LENGTH_MS * 4;

-    sf_length_8kHz    = PE_SUBFR_LENGTH_MS * 8;

     min_lag           = PE_MIN_LAG_MS * Fs_kHz;

-    min_lag_4kHz      = PE_MIN_LAG_MS * 4;

-    min_lag_8kHz      = PE_MIN_LAG_MS * 8;

     max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;

-    max_lag_4kHz      = PE_MAX_LAG_MS * 4;

-    max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;

     /* Resample from input sampled at Fs_kHz to 8 kHz */

+    ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );

     if( Fs_kHz == 16 ) {

         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );

         silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );

@@ -138,6 +147,7 @@

     /* Decimate again to 4 kHz */

     silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */

+    ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );

     silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );

     /* Low-pass filter */

@@ -162,61 +172,65 @@

     /******************************************************************************

     * FIRST STAGE, operating in 4 khz

     ******************************************************************************/

-    silk_memset( C, 0, sizeof( opus_int16 ) * nb_subfr * ( ( PE_MAX_LAG >> 1 ) + 5) );

-    target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];

+    ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );

+    silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );

+    target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];

     for( k = 0; k < nb_subfr >> 1; k++ ) {

         /* Check that we are within range of the array */

         silk_assert( target_ptr >= frame_4kHz );

-        silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );

+        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );

-        basis_ptr = target_ptr - min_lag_4kHz;

+        basis_ptr = target_ptr - MIN_LAG_4KHZ;

         /* Check that we are within range of the array */

         silk_assert( basis_ptr >= frame_4kHz );

-        silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );

+        silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );

         /* Calculate first vector products before loop */

-        cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr,  sf_length_8kHz );

-        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz );

-        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, sf_length_8kHz ) );

-        normalizer = silk_ADD32( normalizer, silk_SMULBB( sf_length_8kHz, 4000 ) );

+        cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr,  SF_LENGTH_8KHZ );

+        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );

+        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );

+        normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );

-        C[ k ][ min_lag_4kHz ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */

+        matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =

+            (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */

         /* From now on normalizer is computed recursively */

-        for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {

+        for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {

             basis_ptr--;

             /* Check that we are within range of the array */

             silk_assert( basis_ptr >= frame_4kHz );

-            silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );

+            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );

-            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );

+            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );

             /* Add contribution of new sample and remove contribution from oldest sample */

             normalizer = silk_ADD32( normalizer,

                 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -

-                silk_SMULBB( basis_ptr[ sf_length_8kHz ], basis_ptr[ sf_length_8kHz ] ) );

+                silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );

-            C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );        /* Q13 */

+            matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =

+                (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */

         /* Update target pointer */

-        target_ptr += sf_length_8kHz;

+        target_ptr += SF_LENGTH_8KHZ;

     /* Combine two subframes into single correlation measure and apply short-lag bias */

     if( nb_subfr == PE_MAX_NB_SUBFR ) {

-        for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {

-            sum = (opus_int32)C[ 0 ][ i ] + (opus_int32) C[ 1 ][ i ];                           /* Q14 */

+        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {

+            sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )

+                + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */

             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */

-            C[ 0 ][ i ] = (opus_int16)sum;                                                      /* Q14 */

+            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */

     } else {

         /* Only short-lag bias */

-        for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {

-            sum = silk_LSHIFT( (opus_int32)C[ 0 ][ i ], 1 );                                    /* Q14 */

+        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {

+            sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */

             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */

-            C[ 0 ][ i ] = (opus_int16)sum;                                                      /* Q14 */

+            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */

@@ -223,15 +237,17 @@

     /* Sort */

     length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );

     silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );

-    silk_insertion_sort_decreasing_int16( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );

+    silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,

+                                          length_d_srch );

     /* Escape if correlation is very low already here */

-    Cmax = (opus_int)C[ 0 ][ min_lag_4kHz ];                                                    /* Q14 */

+    Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */

     if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {

         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );

         *LTPCorr_Q15  = 0;

         *lagIndex     = 0;

         *contourIndex = 0;

+        RESTORE_STACK;

         return 1;

@@ -238,8 +254,8 @@

     threshold = silk_SMULWB( search_thres1_Q16, Cmax );

     for( i = 0; i < length_d_srch; i++ ) {

         /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */

-        if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) {

-            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 );

+        if( C[ i ] > threshold ) {

+            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );

         } else {

             length_d_srch = i;

             break;

@@ -247,21 +263,23 @@

     silk_assert( length_d_srch > 0 );

-    for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) {

-        d_comp[ i ] = 0;

+    ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );

+    for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {

+        d_comp[ i - D_COMP_MIN ] = 0;

     for( i = 0; i < length_d_srch; i++ ) {

-        d_comp[ d_srch[ i ] ] = 1;

+        d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;

     /* Convolution */

-    for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {

-        d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ];

+    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {

+        d_comp[ i - D_COMP_MIN ] +=

+            d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];

     length_d_srch = 0;

-    for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) {

-        if( d_comp[ i + 1 ] > 0 ) {

+    for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {

+        if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {

             d_srch[ length_d_srch ] = i;

             length_d_srch++;

@@ -268,13 +286,14 @@

     /* Convolution */

-    for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {

-        d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ];

+    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {

+        d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]

+            + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];

     length_d_comp = 0;

-    for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) {

-        if( d_comp[ i ] > 0 ) {

+    for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {

+        if( d_comp[ i - D_COMP_MIN ] > 0 ) {

             d_comp[ length_d_comp ] = i - 2;

             length_d_comp++;

@@ -299,7 +318,7 @@

     /*********************************************************************************

     * Find energy of each subframe projected onto its history, for a range of delays

     *********************************************************************************/

-    silk_memset( C, 0, PE_MAX_NB_SUBFR * ( ( PE_MAX_LAG >> 1 ) + 5 ) * sizeof( opus_int16 ) );

+    silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );

     target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];

     for( k = 0; k < nb_subfr; k++ ) {

@@ -306,9 +325,9 @@

         /* Check that we are within range of the array */

         silk_assert( target_ptr >= frame_8kHz );

-        silk_assert( target_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );

+        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );

-        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz ), 1 );

+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );

         for( j = 0; j < length_d_comp; j++ ) {

             d = d_comp[ j ];

             basis_ptr = target_ptr - d;

@@ -315,17 +334,21 @@

             /* Check that we are within range of the array */

             silk_assert( basis_ptr >= frame_8kHz );

-            silk_assert( basis_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );

+            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );

-            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );

+            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );

             if( cross_corr > 0 ) {

-                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz );

-                C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, silk_ADD32( energy_target, energy_basis ), 13 + 1 );        /* Q13 */

+                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );

+                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =

+                    (opus_int16)silk_DIV32_varQ( cross_corr,

+                                                 silk_ADD32( energy_target,

+                                                             energy_basis ),

+                                                 13 + 1 );                                      /* Q13 */

             } else {

-                C[ k ][ d ] = 0;

+                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;

-        target_ptr += sf_length_8kHz;

+        target_ptr += SF_LENGTH_8KHZ;

     /* search over lag range and lags codebook */

@@ -369,8 +392,13 @@

         for( j = 0; j < nb_cbk_search; j++ ) {

             CC[ j ] = 0;

             for( i = 0; i < nb_subfr; i++ ) {

+                opus_int d_subfr;

                 /* Try all codebooks */

-                CC[ j ] = CC[ j ] + (opus_int32)C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ) ];

+                d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );

+                CC[ j ] = CC[ j ]

+                    + (opus_int32)matrix_ptr( C, i,

+                                              d_subfr - ( MIN_LAG_8KHZ - 2 ),

+                                              CSTRIDE_8KHZ );

         /* Find best codebook */

@@ -402,7 +430,7 @@

         if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */

             CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */

-            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz      /* Lag must be in range                             */

+            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */

) {

             CCmax_b = CCmax_new_b;

             CCmax   = CCmax_new;

@@ -417,6 +445,7 @@

         *LTPCorr_Q15  = 0;

         *lagIndex     = 0;

         *contourIndex = 0;

+        RESTORE_STACK;

         return 1;

@@ -425,21 +454,22 @@

     silk_assert( *LTPCorr_Q15 >= 0 );

     if( Fs_kHz > 8 ) {

+        VARDECL( opus_int16, scratch_mem );

         /***************************************************************************/

         /* Scale input signal down to avoid correlations measures from overflowing */

         /***************************************************************************/

         /* find scaling as max scaling for each subframe */

         silk_sum_sqr_shift( &energy, &shift, frame, frame_length );

+        ALLOC( scratch_mem, shift > 0 ? frame_length : 0, opus_int16 );

         if( shift > 0 ) {

             /* Move signal to scratch mem because the input signal should be unchanged */

-            /* Reuse the 32 bit scratch mem vector, use a 16 bit pointer from now */

             shift = silk_RSHIFT( shift, 1 );

-            input_frame_ptr = (opus_int16*)scratch_mem;

             for( i = 0; i < frame_length; i++ ) {

-                input_frame_ptr[ i ] = silk_RSHIFT( frame[ i ], shift );

+                scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );

+            input_frame_ptr = scratch_mem;

         } else {

-            input_frame_ptr = (opus_int16*)frame;

+            input_frame_ptr = frame;

         /* Search in original signal */

@@ -466,14 +496,7 @@

         for( k = 0; k < nb_subfr; k++ ) {

             pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];

-        /* Calculate the correlations and energies needed in stage 3 */

-        silk_P_Ana_calc_corr_st3(  crosscorr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );

-        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );

-        lag_counter = 0;

-        silk_assert( lag == silk_SAT16( lag ) );

-        contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );

         /* Set up codebook parameters according to complexity setting and frame length */

         if( nb_subfr == PE_MAX_NB_SUBFR ) {

             nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];

@@ -485,6 +508,16 @@

             Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];

+        /* Calculate the correlations and energies needed in stage 3 */

+        ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );

+        ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );

+        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );

+        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );

+        lag_counter = 0;

+        silk_assert( lag == silk_SAT16( lag ) );

+        contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );

         target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];

         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );

         for( d = start_lag; d <= end_lag; d++ ) {

@@ -492,8 +525,12 @@

                 cross_corr = 0;

                 energy     = energy_target;

                 for( k = 0; k < nb_subfr; k++ ) {

-                    cross_corr = silk_ADD32( cross_corr, crosscorr_st3[ k ][ j ][ lag_counter ] );

-                    energy     = silk_ADD32( energy, energies_st3[  k ][ j ][ lag_counter ] );

+                    cross_corr = silk_ADD32( cross_corr,

+                        matrix_ptr( cross_corr_st3, k, j,

+                                    nb_cbk_search )[ lag_counter ] );

+                    energy     = silk_ADD32( energy,

+                        matrix_ptr( energies_st3, k, j,

+                                    nb_cbk_search )[ lag_counter ] );

                     silk_assert( energy >= 0 );

                 if( cross_corr > 0 ) {

@@ -525,13 +562,14 @@

         /* Save Lags */

         for( k = 0; k < nb_subfr; k++ ) {

             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );

-            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );

+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );

-        *lagIndex = (opus_int16)( lag - min_lag_8kHz );

+        *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );

         *contourIndex = (opus_int8)CBimax;

     silk_assert( *lagIndex >= 0 );

     /* return as voiced */

+    RESTORE_STACK;

     return 0;

@@ -549,7 +587,7 @@

  * case 4*12*5 = 240 correlations, but more likely around 120.

  ***********************************************************************/

 static void silk_P_Ana_calc_corr_st3(

-    opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */

+    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */

     const opus_int16  frame[],                         /* I vector to correlate         */

     opus_int          start_lag,                       /* I lag offset to search around */

     opus_int          sf_length,                       /* I length of a 5 ms subframe   */

@@ -561,8 +599,9 @@

     opus_int32 cross_corr;

     opus_int   i, j, k, lag_counter, lag_low, lag_high;

     opus_int   nb_cbk_search, delta, idx, cbk_size;

-    opus_int32 scratch_mem[ SCRATCH_SIZE ];

+    VARDECL( opus_int32, scratch_mem );

     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;

+    SAVE_STACK;

     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );

     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );

@@ -579,6 +618,7 @@

         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;

         cbk_size      = PE_NB_CBKS_STAGE3_10MS;

+    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );

     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */

     for( k = 0; k < nb_subfr; k++ ) {

@@ -603,11 +643,13 @@

             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {

                 silk_assert( idx + j < SCRATCH_SIZE );

                 silk_assert( idx + j < lag_counter );

-                cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];

+                matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =

+                    scratch_mem[ idx + j ];

         target_ptr += sf_length;

+    RESTORE_STACK;

 /********************************************************************/

@@ -615,7 +657,7 @@

 /* calculated recursively.                                          */

 /********************************************************************/

 static void silk_P_Ana_calc_energy_st3(

-    opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],    /* O 3 DIM energy array */

+    silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */

     const opus_int16  frame[],                          /* I vector to calc energy in    */

     opus_int          start_lag,                        /* I lag offset to search around */

     opus_int          sf_length,                        /* I length of one 5 ms subframe */

@@ -627,8 +669,9 @@

     opus_int32 energy;

     opus_int   k, i, j, lag_counter;

     opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;

-    opus_int32 scratch_mem[ SCRATCH_SIZE ];

+    VARDECL( opus_int32, scratch_mem );

     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;

+    SAVE_STACK;

     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );

     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );

@@ -645,6 +688,8 @@

         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;

         cbk_size      = PE_NB_CBKS_STAGE3_10MS;

+    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );

     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];

     for( k = 0; k < nb_subfr; k++ ) {

         lag_counter = 0;

@@ -678,10 +723,13 @@

             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {

                 silk_assert( idx + j < SCRATCH_SIZE );

                 silk_assert( idx + j < lag_counter );

-                energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];

-                silk_assert( energies_st3[ k ][ i ][ j ] >= 0 );

+                matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =

+                    scratch_mem[ idx + j ];

+                silk_assert(

+                    matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );

         target_ptr += sf_length;

+    RESTORE_STACK;

--- a/silk/fixed/prefilter_FIX.c

+++ b/silk/fixed/prefilter_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 #include "tuning_parameters.h"

 /* Prefilter for finding Quantizer input signal */

@@ -101,14 +102,17 @@

     opus_int32 *pxw_Q3;

     opus_int   HarmShapeGain_Q12, Tilt_Q14;

     opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14;

-    opus_int32 x_filt_Q12[ MAX_SUB_FRAME_LENGTH ];

-    opus_int32 st_res_Q2[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];

+    VARDECL( opus_int32, x_filt_Q12 );

+    VARDECL( opus_int32, st_res_Q2 );

     opus_int16 B_Q10[ 2 ];

+    SAVE_STACK;

     /* Set up pointers */

     px  = x;

     pxw_Q3 = xw_Q3;

     lag = P->lagPrev;

+    ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 );

+    ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 );

     for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {

         /* Update Variables that change per sub frame */

         if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {

@@ -148,6 +152,7 @@

     P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];

+    RESTORE_STACK;

 /* Prefilter for finding Quantizer input signal */

--- a/silk/fixed/residual_energy_FIX.c

+++ b/silk/fixed/residual_energy_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 /* Calculates residual energies of input subframes where all subframes have LPC_order   */

 /* of preceding samples                                                                 */

@@ -45,14 +46,18 @@

     opus_int         offset, i, j, rshift, lz1, lz2;

-    opus_int16       *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ];

+    opus_int16       *LPC_res_ptr;

+    VARDECL( opus_int16, LPC_res );

     const opus_int16 *x_ptr;

     opus_int32       tmp32;

+    SAVE_STACK;

     x_ptr  = x;

     offset = LPC_order + subfr_length;

     /* Filter input to create the LPC residual for each frame half, and measure subframe energies */

+    ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 );

+    silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );

     for( i = 0; i < nb_subfr >> 1; i++ ) {

         /* Calculate half frame LPC residual signal including preceding samples */

         silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order );

@@ -88,4 +93,5 @@

         nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/

         nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32;

+    RESTORE_STACK;

--- a/silk/fixed/solve_LS_FIX.c

+++ b/silk/fixed/solve_LS_FIX.c

@@ -30,6 +30,7 @@

 #endif

 #include "main_FIX.h"

+#include "stack_alloc.h"

 #include "tuning_parameters.h"

 /*****************************/

@@ -79,11 +80,13 @@

     opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */

-    opus_int32 L_Q16[  MAX_MATRIX_SIZE * MAX_MATRIX_SIZE ];

+    VARDECL( opus_int32, L_Q16 );

     opus_int32 Y[      MAX_MATRIX_SIZE ];

     inv_D_t   inv_D[  MAX_MATRIX_SIZE ];

+    SAVE_STACK;

     silk_assert( M <= MAX_MATRIX_SIZE );

+    ALLOC( L_Q16, M * M, opus_int32 );

     /***************************************************

     Factorize A by LDL such that A = L*D*L',

@@ -107,6 +110,7 @@

     x = inv(L') * inv(D) * Y

     *****************************************************/

     silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );

+    RESTORE_STACK;

 static inline void silk_LDL_factorize_FIX(

--- a/silk/macros.h

+++ b/silk/macros.h

@@ -123,12 +123,15 @@

 /* Row based */

-#define matrix_ptr(Matrix_base_adr, row, column, N)         *(Matrix_base_adr + ((row)*(N)+(column)))

-#define matrix_adr(Matrix_base_adr, row, column, N)          (Matrix_base_adr + ((row)*(N)+(column)))

+#define matrix_ptr(Matrix_base_adr, row, column, N) \

+    (*((Matrix_base_adr) + ((row)*(N)+(column))))

+#define matrix_adr(Matrix_base_adr, row, column, N) \

+      ((Matrix_base_adr) + ((row)*(N)+(column)))

 /* Column based */

 #ifndef matrix_c_ptr

-#   define matrix_c_ptr(Matrix_base_adr, row, column, M)    *(Matrix_base_adr + ((row)+(M)*(column)))

+#   define matrix_c_ptr(Matrix_base_adr, row, column, M) \

+    (*((Matrix_base_adr) + ((row)+(M)*(column))))

 #endif

 #endif /* SILK_MACROS_H */

--- a/silk/resampler_down2.c

+++ b/silk/resampler_down2.c

@@ -35,8 +35,8 @@

 /* Downsample by a factor 2 */

 void silk_resampler_down2(

     opus_int32                  *S,                 /* I/O  State vector [ 2 ]                                          */

-    opus_int16                  *out,               /* O    Output signal [ len ]                                       */

-    const opus_int16            *in,                /* I    Input signal [ floor(len/2) ]                               */

+    opus_int16                  *out,               /* O    Output signal [ floor(len/2) ]                              */

+    const opus_int16            *in,                /* I    Input signal [ len ]                                        */

     opus_int32                  inLen               /* I    Number of input samples                                     */

--- a/silk/resampler_down2_3.c

+++ b/silk/resampler_down2_3.c

@@ -31,6 +31,7 @@

 #include "SigProc_FIX.h"

 #include "resampler_private.h"

+#include "stack_alloc.h"

 #define ORDER_FIR                   4

@@ -43,9 +44,12 @@

     opus_int32 nSamplesIn, counter, res_Q6;

-    opus_int32 buf[ RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR ];

+    VARDECL( opus_int32, buf );

     opus_int32 *buf_ptr;

+    SAVE_STACK;

+    ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 );

     /* Copy buffered samples to start of buffer */

     silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) );

@@ -95,4 +99,5 @@

     /* Copy last part of filtered signal to the state for the next call */

     silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );

+    RESTORE_STACK;

--- a/silk/resampler_private_down_FIR.c

+++ b/silk/resampler_private_down_FIR.c

@@ -31,6 +31,7 @@

 #include "SigProc_FIX.h"

 #include "resampler_private.h"

+#include "stack_alloc.h"

 static inline opus_int16 *silk_resampler_private_down_FIR_INTERPOL(

     opus_int16          *out,

@@ -151,9 +152,12 @@

     silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;

     opus_int32 nSamplesIn;

     opus_int32 max_index_Q16, index_increment_Q16;

-    opus_int32 buf[ RESAMPLER_MAX_BATCH_SIZE_IN + SILK_RESAMPLER_MAX_FIR_ORDER ];

+    VARDECL( opus_int32, buf );

     const opus_int16 *FIR_Coefs;

+    SAVE_STACK;

+    ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 );

     /* Copy buffered samples to start of buffer */

     silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) );

@@ -186,4 +190,5 @@

     /* Copy last part of filtered signal to the state for the next call */

     silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );

+    RESTORE_STACK;

--- a/silk/stereo_LR_to_MS.c

+++ b/silk/stereo_LR_to_MS.c

@@ -30,6 +30,7 @@

 #endif

 #include "main.h"

+#include "stack_alloc.h"

 /* Convert Left/Right stereo signal to adaptive Mid/Side representation */

 void silk_stereo_LR_to_MS(

@@ -49,11 +50,15 @@

     opus_int   n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;

     opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13;

     opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24;

-    opus_int16 side[ MAX_FRAME_LENGTH + 2 ];

-    opus_int16 LP_mid[  MAX_FRAME_LENGTH ], HP_mid[  MAX_FRAME_LENGTH ];

-    opus_int16 LP_side[ MAX_FRAME_LENGTH ], HP_side[ MAX_FRAME_LENGTH ];

+    VARDECL( opus_int16, side );

+    VARDECL( opus_int16, LP_mid );

+    VARDECL( opus_int16, HP_mid );

+    VARDECL( opus_int16, LP_side );

+    VARDECL( opus_int16, HP_side );

     opus_int16 *mid = &x1[ -2 ];

+    SAVE_STACK;

+    ALLOC( side, frame_length + 2, opus_int16 );

     /* Convert to basic mid/side signals */

     for( n = 0; n < frame_length + 2; n++ ) {

         sum  = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ];

@@ -69,6 +74,8 @@

     silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );

     /* LP and HP filter mid signal */

+    ALLOC( LP_mid, frame_length, opus_int16 );

+    ALLOC( HP_mid, frame_length, opus_int16 );

     for( n = 0; n < frame_length; n++ ) {

         sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );

         LP_mid[ n ] = sum;

@@ -76,6 +83,8 @@

     /* LP and HP filter side signal */

+    ALLOC( LP_side, frame_length, opus_int16 );

+    ALLOC( HP_side, frame_length, opus_int16 );

     for( n = 0; n < frame_length; n++ ) {

         sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );

         LP_side[ n ] = sum;

@@ -216,4 +225,5 @@

     state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];

     state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];

     state->width_prev_Q14     = (opus_int16)width_Q14;

+    RESTORE_STACK;

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -1241,7 +1241,7 @@

        VARDECL(unsigned char, tmp_data);

        int nb_frames;

        int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;

-       OpusRepacketizer rp;

+       VARDECL(OpusRepacketizer, rp);

        opus_int32 bytes_per_frame;

@@ -1250,7 +1250,8 @@

        ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);

-       opus_repacketizer_init(&rp);

+       ALLOC(rp, 1, OpusRepacketizer);

+       opus_repacketizer_init(rp);

        bak_mode = st->user_forced_mode;

        bak_bandwidth = st->user_bandwidth;

@@ -1282,7 +1283,7 @@

              RESTORE_STACK;

              return OPUS_INTERNAL_ERROR;

-          ret = opus_repacketizer_cat(&rp, tmp_data+i*bytes_per_frame, tmp_len);

+          ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);

           if (ret<0)

              RESTORE_STACK;

@@ -1289,7 +1290,7 @@

              return OPUS_INTERNAL_ERROR;

-       ret = opus_repacketizer_out(&rp, data, out_data_bytes);

+       ret = opus_repacketizer_out(rp, data, out_data_bytes);

        if (ret<0)

           RESTORE_STACK;