ref: 6f2d9f50680520142361f78b4cf60da068a8ef31
parent: e7f668b3d4711b365cfc9a2e219ee29b57c9abbc
author: Timothy B. Terriberry <[email protected]>
date: Wed Sep 5 03:35:49 EDT 2012
Use dynamic stack allocations in SILK decoder. This allows the decoder to be compiled with NONTHREADSAFE_PSEUDOSTACK to move the memory for large buffers off the stack for devices where it is very limited. This patch only attempts to do this for the decoder. The encoder still requires more than 10 kB of stack.
--- a/silk/PLC.c
+++ b/silk/PLC.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
#include "PLC.h"
#define NB_ATT 2
@@ -178,13 +179,18 @@
opus_int16 rand_scale_Q14;
opus_int16 *B_Q14, *exc_buf_ptr;
opus_int32 *sLPC_Q14_ptr;
- opus_int16 exc_buf[ 2 * MAX_SUB_FRAME_LENGTH ];
+ VARDECL( opus_int16, exc_buf );
opus_int16 A_Q12[ MAX_LPC_ORDER ];
- opus_int16 sLTP[ MAX_FRAME_LENGTH ];
- opus_int32 sLTP_Q14[ 2 * MAX_FRAME_LENGTH ];
+ VARDECL( opus_int16, sLTP );
+ VARDECL( opus_int32, sLTP_Q14 );
silk_PLC_struct *psPLC = &psDec->sPLC;
opus_int32 prevGain_Q10[2];
+ SAVE_STACK;
+ ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
+ ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+ ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+
prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@@ -354,6 +360,7 @@
for( i = 0; i < MAX_NB_SUBFR; i++ ) {
psDecCtrl->pitchL[ i ] = lag;
}
+ RESTORE_STACK;
}
/* Glues concealed frames with new good recieved frames */
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -30,6 +30,7 @@
#endif
#include "API.h"
#include "main.h"
+#include "stack_alloc.h"
/************************/
/* Decoder Super Struct */
@@ -85,8 +86,9 @@
{
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
- opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ];
- opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
+ opus_int16 *samplesOut1_tmp[ 2 ];
+ VARDECL( opus_int16, samplesOut1_tmp_storage );
+ VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
silk_decoder *psDec = ( silk_decoder * )decState;
@@ -93,6 +95,7 @@
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
+ SAVE_STACK;
/**********************************/
/* Test if first frame in payload */
@@ -132,11 +135,13 @@
channel_state[ n ].nb_subfr = 4;
} else {
silk_assert( 0 );
+ RESTORE_STACK;
return SILK_DEC_INVALID_FRAME_SIZE;
}
fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
silk_assert( 0 );
+ RESTORE_STACK;
return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
}
ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
@@ -153,6 +158,7 @@
if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+ RESTORE_STACK;
return( ret );
}
@@ -240,6 +246,14 @@
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
+ ALLOC( samplesOut1_tmp_storage,
+ decControl->nChannelsInternal*(
+ channel_state[ 0 ].frame_length + 2 ),
+ opus_int16 );
+ samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
+ samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+ + channel_state[ 0 ].frame_length + 2;
+
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
} else {
@@ -285,6 +299,8 @@
*nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
/* Set up pointers to temp buffers */
+ ALLOC( samplesOut2_tmp,
+ decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
if( decControl->nChannelsAPI == 2 ) {
resample_out_ptr = samplesOut2_tmp;
} else {
@@ -337,6 +353,7 @@
} else {
psDec->prev_decode_only_middle = decode_only_middle;
}
+ RESTORE_STACK;
return ret;
}
--- a/silk/decode_core.c
+++ b/silk/decode_core.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
/**********************************************************/
/* Core decoder. Performs inverse NSQ operation LTP + LPC */
@@ -43,15 +44,21 @@
{
opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ];
- opus_int16 sLTP[ MAX_FRAME_LENGTH ];
- opus_int32 sLTP_Q15[ 2 * MAX_FRAME_LENGTH ];
+ VARDECL( opus_int16, sLTP );
+ VARDECL( opus_int32, sLTP_Q15 );
opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
- opus_int32 res_Q14[ MAX_SUB_FRAME_LENGTH ];
- opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
+ VARDECL( opus_int32, res_Q14 );
+ VARDECL( opus_int32, sLPC_Q14 );
+ SAVE_STACK;
silk_assert( psDec->prev_gain_Q16 != 0 );
+ ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+ ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+ ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
+ ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
+
offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) {
@@ -227,4 +234,5 @@
/* Save LPC state */
silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+ RESTORE_STACK;
}
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -30,6 +30,7 @@
#endif
#include "main.h"
+#include "stack_alloc.h"
#include "PLC.h"
/****************/
@@ -44,12 +45,16 @@
opus_int condCoding /* I The type of conditional coding to use */
)
{
- silk_decoder_control sDecCtrl;
+ VARDECL( silk_decoder_control, psDecCtrl );
opus_int L, mv_len, ret = 0;
- opus_int pulses[ MAX_FRAME_LENGTH ];
+ VARDECL( opus_int, pulses );
+ SAVE_STACK;
L = psDec->frame_length;
- sDecCtrl.LTP_scale_Q14 = 0;
+ ALLOC( psDecCtrl, 1, silk_decoder_control );
+ ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+ ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
+ psDecCtrl->LTP_scale_Q14 = 0;
/* Safety checks */
silk_assert( L > 0 && L <= MAX_FRAME_LENGTH );
@@ -71,20 +76,17 @@
/********************************************/
/* Decode parameters and pulse signal */
/********************************************/
- silk_decode_parameters( psDec, &sDecCtrl, condCoding );
+ silk_decode_parameters( psDec, psDecCtrl, condCoding );
- /* Update length. Sampling frequency may have changed */
- L = psDec->frame_length;
-
/********************************************************/
/* Run inverse NSQ */
/********************************************************/
- silk_decode_core( psDec, &sDecCtrl, pOut, pulses );
+ silk_decode_core( psDec, psDecCtrl, pOut, pulses );
/********************************************************/
/* Update PLC state */
/********************************************************/
- silk_PLC( psDec, &sDecCtrl, pOut, 0 );
+ silk_PLC( psDec, psDecCtrl, pOut, 0 );
psDec->lossCnt = 0;
psDec->prevSignalType = psDec->indices.signalType;
@@ -94,7 +96,7 @@
psDec->first_frame_after_reset = 0;
} else {
/* Handle packet loss by extrapolation */
- silk_PLC( psDec, &sDecCtrl, pOut, 1 );
+ silk_PLC( psDec, psDecCtrl, pOut, 1 );
}
/*************************/
@@ -113,13 +115,14 @@
/************************************************/
/* Comfort noise generation / estimation */
/************************************************/
- silk_CNG( psDec, &sDecCtrl, pOut, L );
+ silk_CNG( psDec, psDecCtrl, pOut, L );
/* Update some decoder state variables */
- psDec->lagPrev = sDecCtrl.pitchL[ psDec->nb_subfr - 1 ];
+ psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
/* Set output frame length */
*pN = L;
+ RESTORE_STACK;
return ret;
}