ref: c152d602aa6f68b4bc9483393985511bb2d83e86
parent: dc58579c2c7e060084554018e9a2e8c25097a255
author: Timothy B. Terriberry <[email protected]>
date: Wed May 8 06:32:37 EDT 2013
Use dynamic stack allocation in the SILK encoder. This makes all remaining large stack allocations use the vararray macros. This continues the work of 6f2d9f50 to allow compiling with NONTHREADSAFE_PSEUDOSTACK to move the memory for large buffers off the stack for devices where it is very limited. It also does this for some additional large buffers used by the PLC in the decoder.
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -424,10 +424,12 @@
opus_val16 fade = Q15ONE;
int pitch_index;
VARDECL(opus_val32, etmp);
+ VARDECL(opus_val16, exc);
if (loss_count == 0)
{
- opus_val16 lp_pitch_buf[DECODE_BUFFER_SIZE>>1];
+ VARDECL( opus_val16, lp_pitch_buf );
+ ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
@@ -440,14 +442,12 @@
}
ALLOC(etmp, overlap, opus_val32);
+ ALLOC(exc, MAX_PERIOD, opus_val16);
window = mode->window;
c=0; do {
- opus_val16 exc[MAX_PERIOD];
- opus_val32 ac[LPC_ORDER+1];
opus_val16 decay;
opus_val16 attenuation;
opus_val32 S1=0;
- opus_val16 lpc_mem[LPC_ORDER];
celt_sig *buf;
int extrapolation_offset;
int extrapolation_len;
@@ -461,6 +461,7 @@
if (loss_count == 0)
{
+ opus_val32 ac[LPC_ORDER+1];
/* Compute LPC coefficients for the last MAX_PERIOD samples before
the first loss so we can work in the excitation-filter domain. */
_celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD);
@@ -487,14 +488,17 @@
exc_length = IMIN(2*pitch_index, MAX_PERIOD);
/* Initialize the LPC history with the samples just before the start
of the region for which we're computing the excitation. */
- for (i=0;i<LPC_ORDER;i++)
{
- lpc_mem[i] =
- ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+ opus_val16 lpc_mem[LPC_ORDER];
+ for (i=0;i<LPC_ORDER;i++)
+ {
+ lpc_mem[i] =
+ ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+ }
+ /* Compute the excitation for exc_length samples before the loss. */
+ celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+ exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
}
- /* Compute the excitation for exc_length samples before the loss. */
- celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
- exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
/* Check if the waveform is decaying, and if so how fast.
We do this to avoid adding energy when concealing in a segment
@@ -547,14 +551,18 @@
S1 += SHR32(MULT16_16(tmp, tmp), 8);
}
- /* Copy the last decoded samples (prior to the overlap region) to
- synthesis filter memory so we can have a continuous signal. */
- for (i=0;i<LPC_ORDER;i++)
- lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
- /* Apply the synthesis filter to convert the excitation back into the
- signal domain. */
- celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
- buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, lpc_mem);
+ {
+ opus_val16 lpc_mem[LPC_ORDER];
+ /* Copy the last decoded samples (prior to the overlap region) to
+ synthesis filter memory so we can have a continuous signal. */
+ for (i=0;i<LPC_ORDER;i++)
+ lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+ /* Apply the synthesis filter to convert the excitation back into
+ the signal domain. */
+ celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+ buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+ lpc_mem);
+ }
/* Check if the synthesis energy is higher than expected, which can
happen with the signal changes during our window. If so,
--- a/silk/CNG.c
+++ b/silk/CNG.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
/* Generates excitation for CNG LPC synthesis */
static inline void silk_CNG_exc(
@@ -86,8 +87,8 @@
opus_int i, subfr;
opus_int32 sum_Q6, max_Gain_Q16;
opus_int16 A_Q12[ MAX_LPC_ORDER ];
- opus_int32 CNG_sig_Q10[ MAX_FRAME_LENGTH + MAX_LPC_ORDER ];
silk_CNG_struct *psCNG = &psDec->sCNG;
+ SAVE_STACK;
if( psDec->fs_kHz != psCNG->fs_kHz ) {
/* Reset state */
@@ -123,7 +124,10 @@
/* Add CNG when packet is lost or during DTX */
if( psDec->lossCnt ) {
+ VARDECL( opus_int32, CNG_sig_Q10 );
+ ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
+
/* Generate CNG excitation */
silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
@@ -164,4 +168,5 @@
} else {
silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) );
}
+ RESTORE_STACK;
}
--- a/silk/NLSF_encode.c
+++ b/silk/NLSF_encode.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
/***********************/
/* NLSF vector encoder */
@@ -46,10 +47,10 @@
{
opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7;
opus_int32 W_tmp_Q9;
- opus_int32 err_Q26[ NLSF_VQ_MAX_VECTORS ];
- opus_int32 RD_Q25[ NLSF_VQ_MAX_SURVIVORS ];
- opus_int tempIndices1[ NLSF_VQ_MAX_SURVIVORS ];
- opus_int8 tempIndices2[ NLSF_VQ_MAX_SURVIVORS * MAX_LPC_ORDER ];
+ VARDECL( opus_int32, err_Q26 );
+ VARDECL( opus_int32, RD_Q25 );
+ VARDECL( opus_int, tempIndices1 );
+ VARDECL( opus_int8, tempIndices2 );
opus_int16 res_Q15[ MAX_LPC_ORDER ];
opus_int16 res_Q10[ MAX_LPC_ORDER ];
opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ];
@@ -58,6 +59,7 @@
opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
opus_int16 ec_ix[ MAX_LPC_ORDER ];
const opus_uint8 *pCB_element, *iCDF_ptr;
+ SAVE_STACK;
silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );
silk_assert( signalType >= 0 && signalType <= 2 );
@@ -67,11 +69,16 @@
silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
/* First stage: VQ */
+ ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );
silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );
/* Sort the quantization errors */
+ ALLOC( tempIndices1, nSurvivors, opus_int );
silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
+ ALLOC( RD_Q25, nSurvivors, opus_int32 );
+ ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );
+
/* Loop over survivors */
for( s = 0; s < nSurvivors; s++ ) {
ind1 = tempIndices1[ s ];
@@ -125,4 +132,5 @@
silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
return RD_Q25[ 0 ];
+ RESTORE_STACK;
}
--- a/silk/NSQ.c
+++ b/silk/NSQ.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
static inline void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
@@ -88,11 +89,12 @@
opus_int k, lag, start_idx, LSF_interpolation_flag;
const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
opus_int16 *pxq;
- opus_int32 sLTP_Q15[ 2 * MAX_FRAME_LENGTH ];
- opus_int16 sLTP[ 2 * MAX_FRAME_LENGTH ];
+ VARDECL( opus_int32, sLTP_Q15 );
+ VARDECL( opus_int16, sLTP );
opus_int32 HarmShapeFIRPacked_Q14;
opus_int offset_Q10;
- opus_int32 x_sc_Q10[ MAX_SUB_FRAME_LENGTH ];
+ VARDECL( opus_int32, x_sc_Q10 );
+ SAVE_STACK;
NSQ->rand_seed = psIndices->Seed;
@@ -109,6 +111,10 @@
LSF_interpolation_flag = 1;
}
+ ALLOC( sLTP_Q15,
+ psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+ ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+ ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
/* Set up pointers to start of sub frame */
NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
@@ -160,6 +166,7 @@
/* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+ RESTORE_STACK;
}
/***********************************/
--- a/silk/NSQ_del_dec.c
+++ b/silk/NSQ_del_dec.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
typedef struct {
opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
@@ -54,6 +55,8 @@
opus_int32 LPC_exc_Q14;
} NSQ_sample_struct;
+typedef NSQ_sample_struct NSQ_sample_pair[ 2 ];
+
static inline void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@@ -123,17 +126,18 @@
{
opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
opus_int last_smple_idx, smpl_buf_idx, decisionDelay;
- const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
+ const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
opus_int16 *pxq;
- opus_int32 sLTP_Q15[ 2 * MAX_FRAME_LENGTH ];
- opus_int16 sLTP[ 2 * MAX_FRAME_LENGTH ];
+ VARDECL( opus_int32, sLTP_Q15 );
+ VARDECL( opus_int16, sLTP );
opus_int32 HarmShapeFIRPacked_Q14;
opus_int offset_Q10;
opus_int32 RDmin_Q10, Gain_Q10;
- opus_int32 x_sc_Q10[ MAX_SUB_FRAME_LENGTH ];
- opus_int32 delayedGain_Q10[ DECISION_DELAY ];
- NSQ_del_dec_struct psDelDec[ MAX_DEL_DEC_STATES ];
+ VARDECL( opus_int32, x_sc_Q10 );
+ VARDECL( opus_int32, delayedGain_Q10 );
+ VARDECL( NSQ_del_dec_struct, psDelDec );
NSQ_del_dec_struct *psDD;
+ SAVE_STACK;
/* Set unvoiced lag to the previous one, overwrite later for voiced */
lag = NSQ->lagPrev;
@@ -141,6 +145,7 @@
silk_assert( NSQ->prev_gain_Q16 != 0 );
/* Initialize delayed decision states */
+ ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
psDD = &psDelDec[ k ];
@@ -175,6 +180,11 @@
LSF_interpolation_flag = 1;
}
+ ALLOC( sLTP_Q15,
+ psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+ ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+ ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+ ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
/* Set up pointers to start of sub frame */
pxq = &NSQ->xq[ psEncC->ltp_mem_length ];
NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
@@ -287,6 +297,7 @@
/* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+ RESTORE_STACK;
}
/******************************************/
@@ -328,11 +339,13 @@
opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
- NSQ_sample_struct psSampleState[ MAX_DEL_DEC_STATES ][ 2 ];
+ VARDECL( NSQ_sample_pair, psSampleState );
NSQ_del_dec_struct *psDD;
NSQ_sample_struct *psSS;
+ SAVE_STACK;
silk_assert( nStatesDelayedDecision > 0 );
+ ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
@@ -614,6 +627,7 @@
psDD = &psDelDec[ k ];
silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
}
+ RESTORE_STACK;
}
static inline void silk_nsq_del_dec_scale_states(
--- a/silk/VAD.c
+++ b/silk/VAD.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
/* Silk VAD noise level estimation */
static inline void silk_VAD_GetNoiseLevels(
@@ -82,15 +83,19 @@
)
{
opus_int SA_Q15, pSNR_dB_Q7, input_tilt;
- opus_int decimated_framelength, dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
+ opus_int decimated_framelength1, decimated_framelength2;
+ opus_int decimated_framelength;
+ opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
opus_int32 sumSquared, smooth_coef_Q16;
opus_int16 HPstateTmp;
- opus_int16 X[ VAD_N_BANDS ][ MAX_FRAME_LENGTH / 2 ];
+ VARDECL( opus_int16, X );
opus_int32 Xnrg[ VAD_N_BANDS ];
opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
opus_int32 speech_nrg, x_tmp;
+ opus_int X_offset[ VAD_N_BANDS ];
opus_int ret = 0;
silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
+ SAVE_STACK;
/* Safety checks */
silk_assert( VAD_N_BANDS == 4 );
@@ -101,26 +106,46 @@
/***********************/
/* Filter and Decimate */
/***********************/
+ decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
+ decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
+ decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
+ /* Decimate into 4 bands:
+ 0 L 3L L 3L 5L
+ - -- - -- --
+ 8 8 2 4 4
+
+ [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz |
+
+ They're arranged to allow the minimal ( frame_length / 4 ) extra
+ scratch space during the downsampling process */
+ X_offset[ 0 ] = 0;
+ X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
+ X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
+ X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
+ ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
+
/* 0-8 kHz to 0-4 kHz and 4-8 kHz */
- silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], &X[ 0 ][ 0 ], &X[ 3 ][ 0 ], psEncC->frame_length );
+ silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ],
+ X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
/* 0-4 kHz to 0-2 kHz and 2-4 kHz */
- silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState1[ 0 ], &X[ 0 ][ 0 ], &X[ 2 ][ 0 ], silk_RSHIFT( psEncC->frame_length, 1 ) );
+ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
+ X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
/* 0-2 kHz to 0-1 kHz and 1-2 kHz */
- silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState2[ 0 ], &X[ 0 ][ 0 ], &X[ 1 ][ 0 ], silk_RSHIFT( psEncC->frame_length, 2 ) );
+ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
+ X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
/*********************************************/
/* HP filter on lowest band (differentiator) */
/*********************************************/
- decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
- X[ 0 ][ decimated_framelength - 1 ] = silk_RSHIFT( X[ 0 ][ decimated_framelength - 1 ], 1 );
- HPstateTmp = X[ 0 ][ decimated_framelength - 1 ];
+ X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
+ HPstateTmp = X[ decimated_framelength - 1 ];
for( i = decimated_framelength - 1; i > 0; i-- ) {
- X[ 0 ][ i - 1 ] = silk_RSHIFT( X[ 0 ][ i - 1 ], 1 );
- X[ 0 ][ i ] -= X[ 0 ][ i - 1 ];
+ X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 );
+ X[ i ] -= X[ i - 1 ];
}
- X[ 0 ][ 0 ] -= psSilk_VAD->HPstate;
+ X[ 0 ] -= psSilk_VAD->HPstate;
psSilk_VAD->HPstate = HPstateTmp;
/*************************************/
@@ -142,7 +167,8 @@
for( i = 0; i < dec_subframe_length; i++ ) {
/* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */
/* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */
- x_tmp = silk_RSHIFT( X[ b ][ i + dec_subframe_offset ], 3 );
+ x_tmp = silk_RSHIFT(
+ X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
/* Safety check */
@@ -264,6 +290,7 @@
}
return( ret );
+ RESTORE_STACK;
}
/**************************/
--- a/silk/control_codec.c
+++ b/silk/control_codec.c
@@ -35,6 +35,7 @@
#include "main_FLP.h"
#define silk_encoder_state_Fxx silk_encoder_state_FLP
#endif
+#include "stack_alloc.h"
#include "tuning_parameters.h"
#include "pitch_est_defines.h"
@@ -138,6 +139,7 @@
{
opus_int ret = SILK_NO_ERROR;
opus_int32 nSamples_temp;
+ SAVE_STACK;
if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz )
{
@@ -145,35 +147,38 @@
/* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 );
} else {
- /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */
- opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ];
- silk_resampler_state_struct temp_resampler_state;
+ VARDECL( opus_int16, x_buf_API_fs_Hz );
+ VARDECL( silk_resampler_state_struct, temp_resampler_state );
#ifdef FIXED_POINT
opus_int16 *x_bufFIX = psEnc->x_buf;
#else
- opus_int16 x_bufFIX[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
+ VARDECL( opus_int16, x_bufFIX );
#endif
+ opus_int32 nAPI_Samples_temp;
nSamples_temp = silk_LSHIFT( psEnc->sCmn.frame_length, 1 ) + LA_SHAPE_MS * psEnc->sCmn.fs_kHz;
#ifndef FIXED_POINT
+ ALLOC( x_bufFIX, nSamples_temp, opus_int16 );
silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp );
#endif
/* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
- ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );
+ ALLOC( temp_resampler_state, 1, silk_resampler_state_struct );
+ ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );
+ /* Calculate number of samples to temporarily upsample */
+ nAPI_Samples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );
+
/* Temporary resampling of x_buf data to API_fs_Hz */
- ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );
+ ALLOC( x_buf_API_fs_Hz, nAPI_Samples_temp, opus_int16 );
+ ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );
- /* Calculate number of samples that has been temporarily upsampled */
- nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );
-
/* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 );
/* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */
- ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp );
+ ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nAPI_Samples_temp );
#ifndef FIXED_POINT
silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz );
@@ -183,6 +188,7 @@
psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz;
+ RESTORE_STACK;
return ret;
}
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -32,6 +32,7 @@
#include "API.h"
#include "control.h"
#include "typedef.h"
+#include "stack_alloc.h"
#include "structs.h"
#include "tuning_parameters.h"
#ifdef FIXED_POINT
@@ -146,12 +147,14 @@
)
{
opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
- opus_int nSamplesToBuffer, nBlocksOf10ms, nSamplesFromInput = 0;
+ opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
+ opus_int nSamplesFromInput = 0, nSamplesFromInputMax;
opus_int speech_act_thr_for_switch_Q8;
opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
silk_encoder *psEnc = ( silk_encoder * )encState;
- opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];
+ VARDECL( opus_int16, buf );
opus_int transition, curr_block, tot_blocks;
+ SAVE_STACK;
psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
@@ -158,6 +161,7 @@
/* Check values in encoder control structure */
if( ( ret = check_control_input( encControl ) != 0 ) ) {
silk_assert( 0 );
+ RESTORE_STACK;
return ret;
}
@@ -192,6 +196,7 @@
/* Only accept input length of 10 ms */
if( nBlocksOf10ms != 1 ) {
silk_assert( 0 );
+ RESTORE_STACK;
return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
}
/* Reset Encoder */
@@ -212,11 +217,13 @@
/* Only accept input lengths that are a multiple of 10 ms */
if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
silk_assert( 0 );
+ RESTORE_STACK;
return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
}
/* Make sure no more than one packet can be produced */
if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
silk_assert( 0 );
+ RESTORE_STACK;
return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
}
}
@@ -227,6 +234,7 @@
opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
silk_assert( 0 );
+ RESTORE_STACK;
return ret;
}
if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
@@ -239,9 +247,16 @@
silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
/* Input buffering/resampling and encoding */
+ nSamplesToBufferMax =
+ 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
+ nSamplesFromInputMax =
+ silk_DIV32_16( nSamplesToBufferMax *
+ psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
+ psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
+ ALLOC( buf, nSamplesFromInputMax, opus_int16 );
while( 1 ) {
nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
- nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz );
+ nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
/* Resample and write to buffer */
if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
@@ -530,6 +545,7 @@
}
}
+ RESTORE_STACK;
return ret;
}
--- a/silk/encode_pulses.c
+++ b/silk/encode_pulses.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
/*********************************************/
/* Encode quantization indices of excitation */
@@ -66,14 +67,15 @@
{
opus_int i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0;
opus_int32 abs_q, minSumBits_Q5, sumBits_Q5;
- opus_int abs_pulses[ MAX_FRAME_LENGTH ];
- opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ];
- opus_int nRshifts[ MAX_NB_SHELL_BLOCKS ];
+ VARDECL( opus_int, abs_pulses );
+ VARDECL( opus_int, sum_pulses );
+ VARDECL( opus_int, nRshifts );
opus_int pulses_comb[ 8 ];
opus_int *abs_pulses_ptr;
const opus_int8 *pulses_ptr;
const opus_uint8 *cdf_ptr;
const opus_uint8 *nBits_ptr;
+ SAVE_STACK;
silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/
@@ -90,6 +92,8 @@
}
/* Take the absolute value of the pulses */
+ ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int );
+ silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) );
for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) {
abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] );
abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] );
@@ -98,6 +102,8 @@
}
/* Calc sum pulses per shell code frame */
+ ALLOC( sum_pulses, iter, opus_int );
+ ALLOC( nRshifts, iter, opus_int );
abs_pulses_ptr = abs_pulses;
for( i = 0; i < iter; i++ ) {
nRshifts[ i ] = 0;
@@ -196,4 +202,5 @@
/* Encode signs */
/****************/
silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses );
+ RESTORE_STACK;
}
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
#include "tuning_parameters.h"
/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
@@ -84,9 +85,7 @@
{
silk_encoder_control_FIX sEncCtrl;
opus_int i, iter, maxIter, found_upper, found_lower, ret = 0;
- opus_int16 *x_frame, *res_pitch_frame;
- opus_int32 xfw_Q3[ MAX_FRAME_LENGTH ];
- opus_int16 res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+ opus_int16 *x_frame;
ec_enc sRangeEnc_copy, sRangeEnc_copy2;
silk_nsq_state sNSQ_copy, sNSQ_copy2;
opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
@@ -95,7 +94,7 @@
opus_int16 ec_prevLagIndex_copy;
opus_int ec_prevSignalType_copy;
opus_int8 LastGainIndex_copy2;
- opus_uint8 ec_buf_copy[ 1275 ];
+ SAVE_STACK;
/* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
@@ -105,9 +104,8 @@
/**************************************************************/
/* Set up Input Pointers, and insert frame in input buffer */
/*************************************************************/
- /* pointers aligned with start of frame to encode */
- x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
- res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
+ /* start of frame to encode */
+ x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;
/***************************************/
/* Ensure smooth bandwidth transitions */
@@ -120,6 +118,17 @@
silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );
if( !psEnc->sCmn.prefillFlag ) {
+ VARDECL( opus_int32, xfw_Q3 );
+ VARDECL( opus_int16, res_pitch );
+ VARDECL( opus_uint8, ec_buf_copy );
+ opus_int16 *res_pitch_frame;
+
+ ALLOC( res_pitch,
+ psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length
+ + psEnc->sCmn.ltp_mem_length, opus_int16 );
+ /* start of pitch LPC residual frame */
+ res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length;
+
/*****************************************/
/* Find pitch lags, initial LPC analysis */
/*****************************************/
@@ -143,6 +152,7 @@
/*****************************************/
/* Prefiltering for noise shaper */
/*****************************************/
+ ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 );
silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );
/****************************************/
@@ -164,6 +174,7 @@
seed_copy = psEnc->sCmn.indices.Seed;
ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
+ ALLOC( ec_buf_copy, 1275, opus_uint8 );
for( iter = 0; ; iter++ ) {
if( gainsID == gainsID_lower ) {
nBits = nBits_lower;
@@ -299,6 +310,7 @@
if( psEnc->sCmn.prefillFlag ) {
/* No payload */
*pnBytesOut = 0;
+ RESTORE_STACK;
return ret;
}
@@ -309,6 +321,7 @@
/* Payload size */
*pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
+ RESTORE_STACK;
return ret;
}
--- a/silk/fixed/find_LPC_FIX.c
+++ b/silk/fixed/find_LPC_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
#include "tuning_parameters.h"
/* Finds LPC vector from correlations, and converts to NLSF */
@@ -51,7 +52,7 @@
opus_int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q;
opus_int16 a_tmp_Q12[ MAX_LPC_ORDER ];
opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ];
- opus_int16 LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ];
+ SAVE_STACK;
subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
@@ -62,6 +63,8 @@
silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
+ VARDECL( opus_int16, LPC_res );
+
/* Optimal solution for last 10 ms */
silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder );
@@ -81,6 +84,8 @@
/* Convert to NLSFs */
silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder );
+ ALLOC( LPC_res, 2 * subfr_length, opus_int16 );
+
/* Search over interpolation indices to find the one with lowest residual energy */
for( k = 3; k >= 0; k-- ) {
/* Interpolate NLSFs for first half */
@@ -142,4 +147,5 @@
}
silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+ RESTORE_STACK;
}
--- a/silk/fixed/find_pitch_lags_FIX.c
+++ b/silk/fixed/find_pitch_lags_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
#include "tuning_parameters.h"
/* Find pitch lags */
@@ -43,11 +44,13 @@
opus_int buf_len, i, scale;
opus_int32 thrhld_Q13, res_nrg;
const opus_int16 *x_buf, *x_buf_ptr;
- opus_int16 Wsig[ FIND_PITCH_LPC_WIN_MAX ], *Wsig_ptr;
+ VARDECL( opus_int16, Wsig );
+ opus_int16 *Wsig_ptr;
opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
opus_int16 rc_Q15[ MAX_FIND_PITCH_LPC_ORDER ];
opus_int32 A_Q24[ MAX_FIND_PITCH_LPC_ORDER ];
opus_int16 A_Q12[ MAX_FIND_PITCH_LPC_ORDER ];
+ SAVE_STACK;
/******************************************/
/* Set up buffer lengths etc based on Fs */
@@ -65,6 +68,8 @@
/* Calculate windowed signal */
+ ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 );
+
/* First LA_LTP samples */
x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
Wsig_ptr = Wsig;
@@ -134,4 +139,5 @@
psEnc->sCmn.indices.contourIndex = 0;
psEnc->LTPCorr_Q15 = 0;
}
+ RESTORE_STACK;
}
--- a/silk/fixed/find_pred_coefs_FIX.c
+++ b/silk/fixed/find_pred_coefs_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
void silk_find_pred_coefs_FIX(
silk_encoder_state_FIX *psEnc, /* I/O encoder state */
@@ -40,13 +41,14 @@
)
{
opus_int i;
- opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];
opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];
opus_int16 NLSF_Q15[ MAX_LPC_ORDER ];
const opus_int16 *x_ptr;
- opus_int16 *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ];
+ opus_int16 *x_pre_ptr;
+ VARDECL( opus_int16, LPC_in_pre );
opus_int32 tmp, min_gain_Q16, minInvGain_Q30;
opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ];
+ SAVE_STACK;
/* weighting for weighted least squares */
min_gain_Q16 = silk_int32_MAX >> 6;
@@ -71,12 +73,19 @@
local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] );
}
+ ALLOC( LPC_in_pre,
+ psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder
+ + psEnc->sCmn.frame_length, opus_int16 );
if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ VARDECL( opus_int32, WLTP );
+
/**********/
/* VOICED */
/**********/
silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+ ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );
+
/* LTP analysis */
silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
@@ -133,4 +142,5 @@
/* Copy to prediction struct for use in next frame for interpolation */
silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+ RESTORE_STACK;
}
--- a/silk/fixed/noise_shape_analysis_FIX.c
+++ b/silk/fixed/noise_shape_analysis_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
#include "tuning_parameters.h"
/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */
@@ -156,8 +157,9 @@
opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ];
opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ];
- opus_int16 x_windowed[ SHAPE_LPC_WIN_MAX ];
+ VARDECL( opus_int16, x_windowed );
const opus_int16 *x_ptr, *pitch_res_ptr;
+ SAVE_STACK;
/* Point to start of first LPC analysis block */
x_ptr = x - psEnc->sCmn.la_shape;
@@ -258,6 +260,7 @@
/********************************************/
/* Compute noise shaping AR coefs and gains */
/********************************************/
+ ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
/* Apply window: sine slope followed by flat part followed by cosine slope */
opus_int shift, slope_part, flat_part;
@@ -437,4 +440,5 @@
psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 );
}
+ RESTORE_STACK;
}
--- a/silk/fixed/pitch_analysis_core_FIX.c
+++ b/silk/fixed/pitch_analysis_core_FIX.c
@@ -34,15 +34,29 @@
********************************************************** */
#include "SigProc_FIX.h"
#include "pitch_est_defines.h"
+#include "stack_alloc.h"
#include "debug.h"
#define SCRATCH_SIZE 22
+#define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 )
+#define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 )
+#define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 )
+#define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 )
+#define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 )
+#define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 )
+#define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
+#define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
+#define D_COMP_MIN ( MIN_LAG_8KHZ - 3 )
+#define D_COMP_MAX ( MAX_LAG_8KHZ + 4 )
+#define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN )
+typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
+
/************************************************************/
/* Internally used functions */
/************************************************************/
static void silk_P_Ana_calc_corr_st3(
- opus_int32 cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */
+ silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
const opus_int16 frame[], /* I vector to correlate */
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of a 5 ms subframe */
@@ -51,7 +65,7 @@
);
static void silk_P_Ana_calc_energy_st3(
- opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM energy array */
+ silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
const opus_int16 frame[], /* I vector to calc energy in */
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of one 5 ms subframe */
@@ -76,30 +90,30 @@
const opus_int nb_subfr /* I number of 5 ms subframes */
)
{
- opus_int16 frame_8kHz[ PE_MAX_FRAME_LENGTH_ST_2 ];
- opus_int16 frame_4kHz[ PE_MAX_FRAME_LENGTH_ST_1 ];
+ VARDECL( opus_int16, frame_8kHz );
+ VARDECL( opus_int16, frame_4kHz );
opus_int32 filt_state[ 6 ];
- opus_int32 scratch_mem[ 3 * PE_MAX_FRAME_LENGTH ];
- opus_int16 *input_frame_ptr;
+ const opus_int16 *input_frame_ptr;
opus_int i, k, d, j;
- opus_int16 C[ PE_MAX_NB_SUBFR ][ ( PE_MAX_LAG >> 1 ) + 5 ];
+ VARDECL( opus_int16, C );
const opus_int16 *target_ptr, *basis_ptr;
opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
- opus_int16 d_comp[ ( PE_MAX_LAG >> 1 ) + 5 ];
+ VARDECL( opus_int16, d_comp );
opus_int32 sum, threshold, lag_counter;
opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
- opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
- opus_int32 crosscorr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
+ VARDECL( silk_pe_stage3_vals, energies_st3 );
+ VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
opus_int frame_length, frame_length_8kHz, frame_length_4kHz;
- opus_int sf_length, sf_length_8kHz, sf_length_4kHz;
- opus_int min_lag, min_lag_8kHz, min_lag_4kHz;
- opus_int max_lag, max_lag_8kHz, max_lag_4kHz;
+ opus_int sf_length;
+ opus_int min_lag;
+ opus_int max_lag;
opus_int32 contour_bias_Q15, diff;
opus_int nb_cbk_search, cbk_size;
opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
const opus_int8 *Lag_CB_ptr;
+ SAVE_STACK;
/* Check for valid sampling frequency */
silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
@@ -115,16 +129,11 @@
frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz;
- sf_length_4kHz = PE_SUBFR_LENGTH_MS * 4;
- sf_length_8kHz = PE_SUBFR_LENGTH_MS * 8;
min_lag = PE_MIN_LAG_MS * Fs_kHz;
- min_lag_4kHz = PE_MIN_LAG_MS * 4;
- min_lag_8kHz = PE_MIN_LAG_MS * 8;
max_lag = PE_MAX_LAG_MS * Fs_kHz - 1;
- max_lag_4kHz = PE_MAX_LAG_MS * 4;
- max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1;
/* Resample from input sampled at Fs_kHz to 8 kHz */
+ ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
if( Fs_kHz == 16 ) {
silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
@@ -138,6 +147,7 @@
/* Decimate again to 4 kHz */
silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
+ ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
/* Low-pass filter */
@@ -162,61 +172,65 @@
/******************************************************************************
* FIRST STAGE, operating in 4 khz
******************************************************************************/
- silk_memset( C, 0, sizeof( opus_int16 ) * nb_subfr * ( ( PE_MAX_LAG >> 1 ) + 5) );
- target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
+ ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
+ silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
+ target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
for( k = 0; k < nb_subfr >> 1; k++ ) {
/* Check that we are within range of the array */
silk_assert( target_ptr >= frame_4kHz );
- silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+ silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
- basis_ptr = target_ptr - min_lag_4kHz;
+ basis_ptr = target_ptr - MIN_LAG_4KHZ;
/* Check that we are within range of the array */
silk_assert( basis_ptr >= frame_4kHz );
- silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+ silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
/* Calculate first vector products before loop */
- cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
- normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz );
- normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz ) );
- normalizer = silk_ADD32( normalizer, silk_SMULBB( sf_length_8kHz, 4000 ) );
+ cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
+ normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) );
+ normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
- C[ k ][ min_lag_4kHz ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
+ matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
+ (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
/* From now on normalizer is computed recursively */
- for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
+ for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
basis_ptr--;
/* Check that we are within range of the array */
silk_assert( basis_ptr >= frame_4kHz );
- silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+ silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
- cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
+ cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
/* Add contribution of new sample and remove contribution from oldest sample */
normalizer = silk_ADD32( normalizer,
silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
- silk_SMULBB( basis_ptr[ sf_length_8kHz ], basis_ptr[ sf_length_8kHz ] ) );
+ silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
- C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
+ matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
+ (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
}
/* Update target pointer */
- target_ptr += sf_length_8kHz;
+ target_ptr += SF_LENGTH_8KHZ;
}
/* Combine two subframes into single correlation measure and apply short-lag bias */
if( nb_subfr == PE_MAX_NB_SUBFR ) {
- for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
- sum = (opus_int32)C[ 0 ][ i ] + (opus_int32) C[ 1 ][ i ]; /* Q14 */
+ for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+ sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
+ + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */
sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
- C[ 0 ][ i ] = (opus_int16)sum; /* Q14 */
+ C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
}
} else {
/* Only short-lag bias */
- for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
- sum = silk_LSHIFT( (opus_int32)C[ 0 ][ i ], 1 ); /* Q14 */
+ for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+ sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */
sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
- C[ 0 ][ i ] = (opus_int16)sum; /* Q14 */
+ C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
}
}
@@ -223,15 +237,17 @@
/* Sort */
length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
- silk_insertion_sort_decreasing_int16( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );
+ silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
+ length_d_srch );
/* Escape if correlation is very low already here */
- Cmax = (opus_int)C[ 0 ][ min_lag_4kHz ]; /* Q14 */
+ Cmax = (opus_int)C[ 0 ]; /* Q14 */
if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
*LTPCorr_Q15 = 0;
*lagIndex = 0;
*contourIndex = 0;
+ RESTORE_STACK;
return 1;
}
@@ -238,8 +254,8 @@
threshold = silk_SMULWB( search_thres1_Q16, Cmax );
for( i = 0; i < length_d_srch; i++ ) {
/* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
- if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) {
- d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 );
+ if( C[ i ] > threshold ) {
+ d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
} else {
length_d_srch = i;
break;
@@ -247,21 +263,23 @@
}
silk_assert( length_d_srch > 0 );
- for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) {
- d_comp[ i ] = 0;
+ ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
+ for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
+ d_comp[ i - D_COMP_MIN ] = 0;
}
for( i = 0; i < length_d_srch; i++ ) {
- d_comp[ d_srch[ i ] ] = 1;
+ d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
}
/* Convolution */
- for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
- d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ];
+ for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+ d_comp[ i - D_COMP_MIN ] +=
+ d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
}
length_d_srch = 0;
- for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) {
- if( d_comp[ i + 1 ] > 0 ) {
+ for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
+ if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
d_srch[ length_d_srch ] = i;
length_d_srch++;
}
@@ -268,13 +286,14 @@
}
/* Convolution */
- for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
- d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ];
+ for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+ d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
+ + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
}
length_d_comp = 0;
- for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) {
- if( d_comp[ i ] > 0 ) {
+ for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
+ if( d_comp[ i - D_COMP_MIN ] > 0 ) {
d_comp[ length_d_comp ] = i - 2;
length_d_comp++;
}
@@ -299,7 +318,7 @@
/*********************************************************************************
* Find energy of each subframe projected onto its history, for a range of delays
*********************************************************************************/
- silk_memset( C, 0, PE_MAX_NB_SUBFR * ( ( PE_MAX_LAG >> 1 ) + 5 ) * sizeof( opus_int16 ) );
+ silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
for( k = 0; k < nb_subfr; k++ ) {
@@ -306,9 +325,9 @@
/* Check that we are within range of the array */
silk_assert( target_ptr >= frame_8kHz );
- silk_assert( target_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );
+ silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
- energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz ), 1 );
+ energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
for( j = 0; j < length_d_comp; j++ ) {
d = d_comp[ j ];
basis_ptr = target_ptr - d;
@@ -315,17 +334,21 @@
/* Check that we are within range of the array */
silk_assert( basis_ptr >= frame_8kHz );
- silk_assert( basis_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );
+ silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
- cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
+ cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
if( cross_corr > 0 ) {
- energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz );
- C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, silk_ADD32( energy_target, energy_basis ), 13 + 1 ); /* Q13 */
+ energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
+ (opus_int16)silk_DIV32_varQ( cross_corr,
+ silk_ADD32( energy_target,
+ energy_basis ),
+ 13 + 1 ); /* Q13 */
} else {
- C[ k ][ d ] = 0;
+ matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
}
}
- target_ptr += sf_length_8kHz;
+ target_ptr += SF_LENGTH_8KHZ;
}
/* search over lag range and lags codebook */
@@ -369,8 +392,13 @@
for( j = 0; j < nb_cbk_search; j++ ) {
CC[ j ] = 0;
for( i = 0; i < nb_subfr; i++ ) {
+ opus_int d_subfr;
/* Try all codebooks */
- CC[ j ] = CC[ j ] + (opus_int32)C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ) ];
+ d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
+ CC[ j ] = CC[ j ]
+ + (opus_int32)matrix_ptr( C, i,
+ d_subfr - ( MIN_LAG_8KHZ - 2 ),
+ CSTRIDE_8KHZ );
}
}
/* Find best codebook */
@@ -402,7 +430,7 @@
if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */
- silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz /* Lag must be in range */
+ silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */
) {
CCmax_b = CCmax_new_b;
CCmax = CCmax_new;
@@ -417,6 +445,7 @@
*LTPCorr_Q15 = 0;
*lagIndex = 0;
*contourIndex = 0;
+ RESTORE_STACK;
return 1;
}
@@ -425,21 +454,22 @@
silk_assert( *LTPCorr_Q15 >= 0 );
if( Fs_kHz > 8 ) {
+ VARDECL( opus_int16, scratch_mem );
/***************************************************************************/
/* Scale input signal down to avoid correlations measures from overflowing */
/***************************************************************************/
/* find scaling as max scaling for each subframe */
silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
+ ALLOC( scratch_mem, shift > 0 ? frame_length : 0, opus_int16 );
if( shift > 0 ) {
/* Move signal to scratch mem because the input signal should be unchanged */
- /* Reuse the 32 bit scratch mem vector, use a 16 bit pointer from now */
shift = silk_RSHIFT( shift, 1 );
- input_frame_ptr = (opus_int16*)scratch_mem;
for( i = 0; i < frame_length; i++ ) {
- input_frame_ptr[ i ] = silk_RSHIFT( frame[ i ], shift );
+ scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
}
+ input_frame_ptr = scratch_mem;
} else {
- input_frame_ptr = (opus_int16*)frame;
+ input_frame_ptr = frame;
}
/* Search in original signal */
@@ -466,14 +496,7 @@
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
}
- /* Calculate the correlations and energies needed in stage 3 */
- silk_P_Ana_calc_corr_st3( crosscorr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
- silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
- lag_counter = 0;
- silk_assert( lag == silk_SAT16( lag ) );
- contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
-
/* Set up codebook parameters according to complexity setting and frame length */
if( nb_subfr == PE_MAX_NB_SUBFR ) {
nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
@@ -485,6 +508,16 @@
Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
}
+ /* Calculate the correlations and energies needed in stage 3 */
+ ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+ ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+ silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+ silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+
+ lag_counter = 0;
+ silk_assert( lag == silk_SAT16( lag ) );
+ contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
+
target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
for( d = start_lag; d <= end_lag; d++ ) {
@@ -492,8 +525,12 @@
cross_corr = 0;
energy = energy_target;
for( k = 0; k < nb_subfr; k++ ) {
- cross_corr = silk_ADD32( cross_corr, crosscorr_st3[ k ][ j ][ lag_counter ] );
- energy = silk_ADD32( energy, energies_st3[ k ][ j ][ lag_counter ] );
+ cross_corr = silk_ADD32( cross_corr,
+ matrix_ptr( cross_corr_st3, k, j,
+ nb_cbk_search )[ lag_counter ] );
+ energy = silk_ADD32( energy,
+ matrix_ptr( energies_st3, k, j,
+ nb_cbk_search )[ lag_counter ] );
silk_assert( energy >= 0 );
}
if( cross_corr > 0 ) {
@@ -525,13 +562,14 @@
/* Save Lags */
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
- pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
}
- *lagIndex = (opus_int16)( lag - min_lag_8kHz );
+ *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
*contourIndex = (opus_int8)CBimax;
}
silk_assert( *lagIndex >= 0 );
/* return as voiced */
+ RESTORE_STACK;
return 0;
}
@@ -549,7 +587,7 @@
* case 4*12*5 = 240 correlations, but more likely around 120.
***********************************************************************/
static void silk_P_Ana_calc_corr_st3(
- opus_int32 cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */
+ silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
const opus_int16 frame[], /* I vector to correlate */
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of a 5 ms subframe */
@@ -561,8 +599,9 @@
opus_int32 cross_corr;
opus_int i, j, k, lag_counter, lag_low, lag_high;
opus_int nb_cbk_search, delta, idx, cbk_size;
- opus_int32 scratch_mem[ SCRATCH_SIZE ];
+ VARDECL( opus_int32, scratch_mem );
const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+ SAVE_STACK;
silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
@@ -579,6 +618,7 @@
nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
cbk_size = PE_NB_CBKS_STAGE3_10MS;
}
+ ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
for( k = 0; k < nb_subfr; k++ ) {
@@ -603,11 +643,13 @@
for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
silk_assert( idx + j < SCRATCH_SIZE );
silk_assert( idx + j < lag_counter );
- cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
+ matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
+ scratch_mem[ idx + j ];
}
}
target_ptr += sf_length;
}
+ RESTORE_STACK;
}
/********************************************************************/
@@ -615,7 +657,7 @@
/* calculated recursively. */
/********************************************************************/
static void silk_P_Ana_calc_energy_st3(
- opus_int32 energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM energy array */
+ silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
const opus_int16 frame[], /* I vector to calc energy in */
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of one 5 ms subframe */
@@ -627,8 +669,9 @@
opus_int32 energy;
opus_int k, i, j, lag_counter;
opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff;
- opus_int32 scratch_mem[ SCRATCH_SIZE ];
+ VARDECL( opus_int32, scratch_mem );
const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+ SAVE_STACK;
silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
@@ -645,6 +688,8 @@
nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
cbk_size = PE_NB_CBKS_STAGE3_10MS;
}
+ ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+
target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
for( k = 0; k < nb_subfr; k++ ) {
lag_counter = 0;
@@ -678,10 +723,13 @@
for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
silk_assert( idx + j < SCRATCH_SIZE );
silk_assert( idx + j < lag_counter );
- energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
- silk_assert( energies_st3[ k ][ i ][ j ] >= 0 );
+ matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
+ scratch_mem[ idx + j ];
+ silk_assert(
+ matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
}
}
target_ptr += sf_length;
}
+ RESTORE_STACK;
}
--- a/silk/fixed/prefilter_FIX.c
+++ b/silk/fixed/prefilter_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
#include "tuning_parameters.h"
/* Prefilter for finding Quantizer input signal */
@@ -101,14 +102,17 @@
opus_int32 *pxw_Q3;
opus_int HarmShapeGain_Q12, Tilt_Q14;
opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14;
- opus_int32 x_filt_Q12[ MAX_SUB_FRAME_LENGTH ];
- opus_int32 st_res_Q2[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
+ VARDECL( opus_int32, x_filt_Q12 );
+ VARDECL( opus_int32, st_res_Q2 );
opus_int16 B_Q10[ 2 ];
+ SAVE_STACK;
/* Set up pointers */
px = x;
pxw_Q3 = xw_Q3;
lag = P->lagPrev;
+ ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 );
+ ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 );
for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
/* Update Variables that change per sub frame */
if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
@@ -148,6 +152,7 @@
}
P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+ RESTORE_STACK;
}
/* Prefilter for finding Quantizer input signal */
--- a/silk/fixed/residual_energy_FIX.c
+++ b/silk/fixed/residual_energy_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
/* Calculates residual energies of input subframes where all subframes have LPC_order */
/* of preceding samples */
@@ -45,14 +46,18 @@
)
{
opus_int offset, i, j, rshift, lz1, lz2;
- opus_int16 *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ];
+ opus_int16 *LPC_res_ptr;
+ VARDECL( opus_int16, LPC_res );
const opus_int16 *x_ptr;
opus_int32 tmp32;
+ SAVE_STACK;
x_ptr = x;
offset = LPC_order + subfr_length;
/* Filter input to create the LPC residual for each frame half, and measure subframe energies */
+ ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 );
+ silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
for( i = 0; i < nb_subfr >> 1; i++ ) {
/* Calculate half frame LPC residual signal including preceding samples */
silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order );
@@ -88,4 +93,5 @@
nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/
nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32;
}
+ RESTORE_STACK;
}
--- a/silk/fixed/solve_LS_FIX.c
+++ b/silk/fixed/solve_LS_FIX.c
@@ -30,6 +30,7 @@
#endif
#include "main_FIX.h"
+#include "stack_alloc.h"
#include "tuning_parameters.h"
/*****************************/
@@ -79,11 +80,13 @@
opus_int32 *x_Q16 /* O Pointer to x solution vector */
)
{
- opus_int32 L_Q16[ MAX_MATRIX_SIZE * MAX_MATRIX_SIZE ];
+ VARDECL( opus_int32, L_Q16 );
opus_int32 Y[ MAX_MATRIX_SIZE ];
inv_D_t inv_D[ MAX_MATRIX_SIZE ];
+ SAVE_STACK;
silk_assert( M <= MAX_MATRIX_SIZE );
+ ALLOC( L_Q16, M * M, opus_int32 );
/***************************************************
Factorize A by LDL such that A = L*D*L',
@@ -107,6 +110,7 @@
x = inv(L') * inv(D) * Y
*****************************************************/
silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
+ RESTORE_STACK;
}
static inline void silk_LDL_factorize_FIX(
--- a/silk/macros.h
+++ b/silk/macros.h
@@ -123,12 +123,15 @@
}
/* Row based */
-#define matrix_ptr(Matrix_base_adr, row, column, N) *(Matrix_base_adr + ((row)*(N)+(column)))
-#define matrix_adr(Matrix_base_adr, row, column, N) (Matrix_base_adr + ((row)*(N)+(column)))
+#define matrix_ptr(Matrix_base_adr, row, column, N) \
+ (*((Matrix_base_adr) + ((row)*(N)+(column))))
+#define matrix_adr(Matrix_base_adr, row, column, N) \
+ ((Matrix_base_adr) + ((row)*(N)+(column)))
/* Column based */
#ifndef matrix_c_ptr
-# define matrix_c_ptr(Matrix_base_adr, row, column, M) *(Matrix_base_adr + ((row)+(M)*(column)))
+# define matrix_c_ptr(Matrix_base_adr, row, column, M) \
+ (*((Matrix_base_adr) + ((row)+(M)*(column))))
#endif
#endif /* SILK_MACROS_H */
--- a/silk/resampler_down2.c
+++ b/silk/resampler_down2.c
@@ -35,8 +35,8 @@
/* Downsample by a factor 2 */
void silk_resampler_down2(
opus_int32 *S, /* I/O State vector [ 2 ] */
- opus_int16 *out, /* O Output signal [ len ] */
- const opus_int16 *in, /* I Input signal [ floor(len/2) ] */
+ opus_int16 *out, /* O Output signal [ floor(len/2) ] */
+ const opus_int16 *in, /* I Input signal [ len ] */
opus_int32 inLen /* I Number of input samples */
)
{
--- a/silk/resampler_down2_3.c
+++ b/silk/resampler_down2_3.c
@@ -31,6 +31,7 @@
#include "SigProc_FIX.h"
#include "resampler_private.h"
+#include "stack_alloc.h"
#define ORDER_FIR 4
@@ -43,9 +44,12 @@
)
{
opus_int32 nSamplesIn, counter, res_Q6;
- opus_int32 buf[ RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR ];
+ VARDECL( opus_int32, buf );
opus_int32 *buf_ptr;
+ SAVE_STACK;
+ ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 );
+
/* Copy buffered samples to start of buffer */
silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) );
@@ -95,4 +99,5 @@
/* Copy last part of filtered signal to the state for the next call */
silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );
+ RESTORE_STACK;
}
--- a/silk/resampler_private_down_FIR.c
+++ b/silk/resampler_private_down_FIR.c
@@ -31,6 +31,7 @@
#include "SigProc_FIX.h"
#include "resampler_private.h"
+#include "stack_alloc.h"
static inline opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
opus_int16 *out,
@@ -151,9 +152,12 @@
silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
opus_int32 nSamplesIn;
opus_int32 max_index_Q16, index_increment_Q16;
- opus_int32 buf[ RESAMPLER_MAX_BATCH_SIZE_IN + SILK_RESAMPLER_MAX_FIR_ORDER ];
+ VARDECL( opus_int32, buf );
const opus_int16 *FIR_Coefs;
+ SAVE_STACK;
+ ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 );
+
/* Copy buffered samples to start of buffer */
silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) );
@@ -186,4 +190,5 @@
/* Copy last part of filtered signal to the state for the next call */
silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );
+ RESTORE_STACK;
}
--- a/silk/stereo_LR_to_MS.c
+++ b/silk/stereo_LR_to_MS.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
void silk_stereo_LR_to_MS(
@@ -49,11 +50,15 @@
opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13;
opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24;
- opus_int16 side[ MAX_FRAME_LENGTH + 2 ];
- opus_int16 LP_mid[ MAX_FRAME_LENGTH ], HP_mid[ MAX_FRAME_LENGTH ];
- opus_int16 LP_side[ MAX_FRAME_LENGTH ], HP_side[ MAX_FRAME_LENGTH ];
+ VARDECL( opus_int16, side );
+ VARDECL( opus_int16, LP_mid );
+ VARDECL( opus_int16, HP_mid );
+ VARDECL( opus_int16, LP_side );
+ VARDECL( opus_int16, HP_side );
opus_int16 *mid = &x1[ -2 ];
+ SAVE_STACK;
+ ALLOC( side, frame_length + 2, opus_int16 );
/* Convert to basic mid/side signals */
for( n = 0; n < frame_length + 2; n++ ) {
sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ];
@@ -69,6 +74,8 @@
silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
/* LP and HP filter mid signal */
+ ALLOC( LP_mid, frame_length, opus_int16 );
+ ALLOC( HP_mid, frame_length, opus_int16 );
for( n = 0; n < frame_length; n++ ) {
sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
LP_mid[ n ] = sum;
@@ -76,6 +83,8 @@
}
/* LP and HP filter side signal */
+ ALLOC( LP_side, frame_length, opus_int16 );
+ ALLOC( HP_side, frame_length, opus_int16 );
for( n = 0; n < frame_length; n++ ) {
sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
LP_side[ n ] = sum;
@@ -216,4 +225,5 @@
state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
state->width_prev_Q14 = (opus_int16)width_Q14;
+ RESTORE_STACK;
}
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1241,7 +1241,7 @@
VARDECL(unsigned char, tmp_data);
int nb_frames;
int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
- OpusRepacketizer rp;
+ VARDECL(OpusRepacketizer, rp);
opus_int32 bytes_per_frame;
@@ -1250,7 +1250,8 @@
ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
- opus_repacketizer_init(&rp);
+ ALLOC(rp, 1, OpusRepacketizer);
+ opus_repacketizer_init(rp);
bak_mode = st->user_forced_mode;
bak_bandwidth = st->user_bandwidth;
@@ -1282,7 +1283,7 @@
RESTORE_STACK;
return OPUS_INTERNAL_ERROR;
}
- ret = opus_repacketizer_cat(&rp, tmp_data+i*bytes_per_frame, tmp_len);
+ ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);
if (ret<0)
{
RESTORE_STACK;
@@ -1289,7 +1290,7 @@
return OPUS_INTERNAL_ERROR;
}
}
- ret = opus_repacketizer_out(&rp, data, out_data_bytes);
+ ret = opus_repacketizer_out(rp, data, out_data_bytes);
if (ret<0)
{
RESTORE_STACK;