shithub: opus

Download patch

ref: 3195f6cdb985b2818683a30bdd5298895694bd2d
parent: 53ea87fb96bc7eb550d1f60b3136e32857a4f9d9
author: Koen Vos <[email protected]>
date: Mon Oct 10 16:46:32 EDT 2011

Misc SILK fixes:

- compile warning in opus_decoder.c
- decoder state reduced by ~3 kB (by merging buffers, as Tim suggested)
- some minor decoder optimizations (only the PLC is non-bit exact, so should be ok)

--- a/silk/PLC.c
+++ b/silk/PLC.c
@@ -166,28 +166,23 @@
 )
 {
     opus_int   i, j, k;
-    opus_int16 *B_Q14, exc_buf[ MAX_FRAME_LENGTH ], *exc_buf_ptr;
-    opus_int16 rand_scale_Q14, A_Q12_tmp[ MAX_LPC_ORDER ];
-    opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15;
     opus_int   lag, idx, sLTP_buf_idx, shift1, shift2;
+    opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q16, inv_gain_Q30;
     opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
-    opus_int32 sig_Q10[ MAX_FRAME_LENGTH ], *sig_Q10_ptr, LPC_exc_Q10, LPC_pred_Q10,  LTP_pred_Q14;
-    silk_PLC_struct *psPLC;
-    psPLC = &psDec->sPLC;
+    opus_int32 LPC_exc_Q14, LPC_pred_Q10, LTP_pred_Q12;
+    opus_int16 rand_scale_Q14;
+    opus_int16 *B_Q14, *exc_buf_ptr;
+    opus_int32 *sLPC_Q14_ptr;
+    opus_int16 exc_buf[ 2 * MAX_SUB_FRAME_LENGTH ];
+    opus_int16 A_Q12[ MAX_LPC_ORDER ];
+    opus_int16 sLTP[ MAX_FRAME_LENGTH ];
+    opus_int32 sLTP_Q14[ 2 * MAX_FRAME_LENGTH ];
+    silk_PLC_struct *psPLC = &psDec->sPLC;
 
-    /* Update LTP buffer */
-    silk_memmove( psDec->sLTP_Q16, &psDec->sLTP_Q16[ psDec->frame_length ], psDec->ltp_mem_length * sizeof( opus_int32 ) );
-
-    /* LPC concealment. Apply BWE to previous LPC */
-    silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) );
-
     /* Find random noise component */
     /* Scale previous excitation signal */
     exc_buf_ptr = exc_buf;
-    /* FIXME: JMV: Is this the right fix? */
-    for (i=0;i<MAX_FRAME_LENGTH;i++)
-        exc_buf[i] = 0;
-    for( k = ( psDec->nb_subfr >> 1 ); k < psDec->nb_subfr; k++ ) {
+    for( k = psDec->nb_subfr - 2; k < psDec->nb_subfr; k++ ) {
         for( i = 0; i < psDec->subfr_length; i++ ) {
             exc_buf_ptr[ i ] = ( opus_int16 )silk_RSHIFT(
                 silk_SMULWW( psDec->exc_Q10[ i + k * psDec->subfr_length ], psPLC->prevGain_Q16[ k ] ), 10 );
@@ -200,7 +195,7 @@
 
     if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
         /* First sub-frame has lowest energy */
-        rand_ptr = &psDec->exc_Q10[ silk_max_int( 0, 3 * psDec->subfr_length - RAND_BUF_SIZE ) ];
+        rand_ptr = &psDec->exc_Q10[ silk_max_int( 0, psDec->frame_length - psDec->subfr_length - RAND_BUF_SIZE ) ];
     } else {
         /* Second sub-frame has lowest energy */
         rand_ptr = &psDec->exc_Q10[ silk_max_int( 0, psDec->frame_length - RAND_BUF_SIZE ) ];
@@ -218,6 +213,12 @@
         rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
     }
 
+    /* LPC concealment. Apply BWE to previous LPC */
+    silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) );
+
+    /* Preload LPC coeficients to array on stack. Gives small performance gain */
+    silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
+
     /* First Lost frame */
     if( psDec->lossCnt == 0 ) {
         rand_scale_Q14 = 1 << 14;
@@ -247,37 +248,42 @@
     lag          = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
     sLTP_buf_idx = psDec->ltp_mem_length;
 
+    /* Rewhiten LTP state */
+    idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
+    silk_assert( idx > 0 );
+    silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order );
+    /* Scale LTP state */
+    inv_gain_Q16 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ psDec->nb_subfr - 1 ], 32 );
+    inv_gain_Q16 = silk_min( inv_gain_Q16, silk_int16_MAX );
+    inv_gain_Q30 = silk_LSHIFT( inv_gain_Q16, 14 );
+    for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) {
+        sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] );
+    }
+
     /***************************/
     /* LTP synthesis filtering */
     /***************************/
-    sig_Q10_ptr = sig_Q10;
     for( k = 0; k < psDec->nb_subfr; k++ ) {
         /* Setup pointer */
-        pred_lag_ptr = &psDec->sLTP_Q16[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+        pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
         for( i = 0; i < psDec->subfr_length; i++ ) {
-            rand_seed = silk_RAND( rand_seed );
-            idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK;
-
             /* Unrolled loop */
-            LTP_pred_Q14 = silk_SMULWB(               pred_lag_ptr[  0 ], B_Q14[ 0 ] );
-            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
-            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
-            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
-            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
+            LTP_pred_Q12 = silk_SMULWB(               pred_lag_ptr[  0 ], B_Q14[ 0 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
             pred_lag_ptr++;
 
-            /* Generate LPC residual */
-            LPC_exc_Q10 = silk_LSHIFT( silk_SMULWB( rand_ptr[ idx ], rand_scale_Q14 ), 2 ); /* Random noise part */
-            LPC_exc_Q10 = silk_ADD32( LPC_exc_Q10, silk_RSHIFT_ROUND( LTP_pred_Q14, 4 ) );  /* Harmonic part */
-
-            /* Update states */
-            psDec->sLTP_Q16[ sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q10, 6 );
+            /* Generate LPC excitation */
+            rand_seed = silk_RAND( rand_seed );
+            idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK;
+            LPC_exc_Q14 = silk_LSHIFT32( silk_SMULWB( rand_ptr[ idx ], rand_scale_Q14 ), 6 ); /* Random noise part */
+            LPC_exc_Q14 = silk_ADD32( LPC_exc_Q14, silk_LSHIFT32( LTP_pred_Q12, 2 ) );        /* Harmonic part */
+            sLTP_Q14[ sLTP_buf_idx ] = LPC_exc_Q14;
             sLTP_buf_idx++;
-
-            /* Save LPC residual */
-            sig_Q10_ptr[ i ] = LPC_exc_Q10;
         }
-        sig_Q10_ptr += psDec->subfr_length;
+
         /* Gradually reduce LTP gain */
         for( j = 0; j < LTP_ORDER; j++ ) {
             B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 );
@@ -286,7 +292,7 @@
         rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
 
         /* Slowly increase pitch lag */
-        psPLC->pitchL_Q8 += silk_SMULWB( psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 );
+        psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 );
         psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) );
         lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
     }
@@ -294,43 +300,37 @@
     /***************************/
     /* LPC synthesis filtering */
     /***************************/
-    sig_Q10_ptr = sig_Q10;
-    /* Preload LPC coeficients to array on stack. Gives small performance gain */
-    silk_memcpy( A_Q12_tmp, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
-    silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
-    for( k = 0; k < psDec->nb_subfr; k++ ) {
-        for( i = 0; i < psDec->subfr_length; i++ ){
-            /* partly unrolled */
-            LPC_pred_Q10 = silk_SMULWB(               psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12_tmp[ 0 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12_tmp[ 1 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12_tmp[ 2 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12_tmp[ 3 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12_tmp[ 4 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12_tmp[ 5 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12_tmp[ 6 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12_tmp[ 7 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12_tmp[ 8 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] );
+    sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ];
 
-            for( j = 10; j < psDec->LPC_order; j++ ) {
-                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i - j - 1 ], A_Q12_tmp[ j ] );
-            }
+    /* Copy LPC state */
+    silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
 
-            /* Add prediction to LPC residual */
-            sig_Q10_ptr[ i ] = silk_ADD32( sig_Q10_ptr[ i ], LPC_pred_Q10 );
-
-            /* Update states */
-            psDec->sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_LSHIFT( sig_Q10_ptr[ i ], 4 );
+    silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
+    for( i = 0; i < psDec->frame_length; i++ ) {
+        /* partly unrolled */
+        LPC_pred_Q10 = silk_SMULWB(               sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+        for( j = 10; j < psDec->LPC_order; j++ ) {
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] );
         }
-        sig_Q10_ptr += psDec->subfr_length;
-        /* Update LPC filter state */
-        silk_memcpy( psDec->sLPC_Q14, &psDec->sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
-    }
 
-    /* Scale with Gain */
-    for( i = 0; i < psDec->frame_length; i++ ) {
-        frame[ i ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sig_Q10[ i ], psPLC->prevGain_Q16[ psDec->nb_subfr - 1 ] ), 10 ) );
+        /* Add prediction to LPC excitation */
+        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+
+        /* Scale with Gain */
+        frame[ i ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], psPLC->prevGain_Q16[ psDec->nb_subfr - 1 ] ), 14 ) );
     }
+
+    /* Save LPC state */
+    silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
 
     /**************************************/
     /* Update states                      */
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -227,7 +227,7 @@
             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
         {
             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
-            /* For LBRR data, only decode mid-only flag if side-channel's LBRR flag is false */
+            /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
             if(   lostFlag == FLAG_DECODE_NORMAL ||
                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
             {
--- a/silk/decode_core.c
+++ b/silk/decode_core.c
@@ -44,10 +44,11 @@
     opus_int   i, j, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
     opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ];
     opus_int16 sLTP[ MAX_FRAME_LENGTH ];
+    opus_int32 sLTP_Q16[ 2 * MAX_FRAME_LENGTH ];
     opus_int32 LTP_pred_Q14, LPC_pred_Q10, Gain_Q10, inv_gain_Q16, inv_gain_Q32, gain_adj_Q16, rand_seed, offset_Q10;
     opus_int32 *pred_lag_ptr, *pexc_Q10, *pres_Q10;
     opus_int32 res_Q10[ MAX_SUB_FRAME_LENGTH ];
-    opus_int32 vec_Q10[ MAX_SUB_FRAME_LENGTH ];
+    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
 
     silk_assert( psDec->prev_inv_gain_Q16 != 0 );
 
@@ -80,8 +81,11 @@
     DEBUG_STORE_DATA( dec_q.dat, pulses, psDec->frame_length * sizeof( opus_int ) );
 #endif
 
+    /* Copy LPC state */
+    silk_memcpy( sLPC_Q14, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
     pexc_Q10 = psDec->exc_Q10;
-    pxq      = &psDec->outBuf[ psDec->ltp_mem_length ];
+    pxq      = xq;
     sLTP_buf_idx = psDec->ltp_mem_length;
     /* Loop over subframes */
     for( k = 0; k < psDec->nb_subfr; k++ ) {
@@ -104,7 +108,7 @@
 
             /* Scale short term state */
             for( i = 0; i < MAX_LPC_ORDER; i++ ) {
-                psDec->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDec->sLPC_Q14[ i ] );
+                sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, sLPC_Q14[ i ] );
             }
         }
 
@@ -128,11 +132,15 @@
             lag = psDecCtrl->pitchL[ k ];
 
             /* Re-whitening */
-            if( ( k & ( 3 - silk_LSHIFT( NLSF_interpolation_flag, 1 ) ) ) == 0 ) {
+            if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) {
                 /* Rewhiten with new A coefs */
                 start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
                 silk_assert( start_idx > 0 );
 
+                if( k == 2 ) {
+                    silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) );
+                }
+
                 silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ],
                     A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order );
 
@@ -139,17 +147,17 @@
                 /* After rewhitening the LTP state is unscaled */
                 inv_gain_Q32 = silk_LSHIFT( inv_gain_Q16, 16 );
                 if( k == 0 ) {
-                    /* Do LTP downscaling */
+                    /* Do LTP downscaling to reduce inter-packet dependency */
                     inv_gain_Q32 = silk_LSHIFT( silk_SMULWB( inv_gain_Q32, psDecCtrl->LTP_scale_Q14 ), 2 );
                 }
                 for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
-                    psDec->sLTP_Q16[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q32, sLTP[ psDec->ltp_mem_length - i - 1 ] );
+                    sLTP_Q16[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q32, sLTP[ psDec->ltp_mem_length - i - 1 ] );
                 }
             } else {
                 /* Update LTP state when Gain changes */
                 if( gain_adj_Q16 != 1 << 16 ) {
                     for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
-                        psDec->sLTP_Q16[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, psDec->sLTP_Q16[ sLTP_buf_idx - i - 1 ] );
+                        sLTP_Q16[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, sLTP_Q16[ sLTP_buf_idx - i - 1 ] );
                     }
                 }
             }
@@ -158,7 +166,7 @@
         /* Long-term prediction */
         if( signalType == TYPE_VOICED ) {
             /* Setup pointer */
-            pred_lag_ptr = &psDec->sLTP_Q16[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+            pred_lag_ptr = &sLTP_Q16[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
             for( i = 0; i < psDec->subfr_length; i++ ) {
                 /* Unrolled loop */
                 LTP_pred_Q14 = silk_SMULWB(               pred_lag_ptr[  0 ], B_Q14[ 0 ] );
@@ -172,7 +180,7 @@
                 pres_Q10[ i ] = silk_ADD32( pexc_Q10[ i ], silk_RSHIFT_ROUND( LTP_pred_Q14, 4 ) );
 
                 /* Update states */
-                psDec->sLTP_Q16[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q10[ i ], 6 );
+                sLTP_Q16[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q10[ i ], 6 );
                 sLTP_buf_idx++;
             }
         } else {
@@ -186,43 +194,38 @@
 
         for( i = 0; i < psDec->subfr_length; i++ ) {
             /* Partially unrolled */
-            LPC_pred_Q10 = silk_SMULWB(               psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12_tmp[ 0 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12_tmp[ 1 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12_tmp[ 2 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12_tmp[ 3 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12_tmp[ 4 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12_tmp[ 5 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12_tmp[ 6 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12_tmp[ 7 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12_tmp[ 8 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] );
+            LPC_pred_Q10 = silk_SMULWB(               sLPC_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12_tmp[ 0 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12_tmp[ 1 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12_tmp[ 2 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12_tmp[ 3 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12_tmp[ 4 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12_tmp[ 5 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12_tmp[ 6 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12_tmp[ 7 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12_tmp[ 8 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] );
             for( j = 10; j < psDec->LPC_order; j++ ) {
-                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psDec->sLPC_Q14[ MAX_LPC_ORDER + i - j - 1 ], A_Q12_tmp[ j ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - j - 1 ], A_Q12_tmp[ j ] );
             }
 
             /* Add prediction to LPC excitation */
-            vec_Q10[ i ] = silk_ADD32( pres_Q10[ i ], LPC_pred_Q10 );
+            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_LSHIFT( silk_ADD32( pres_Q10[ i ], LPC_pred_Q10 ), 4 );
 
-            /* Update states */
-            psDec->sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_LSHIFT( vec_Q10[ i ], 4 );
+            /* Scale with Gain */
+            pxq[ i ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
         }
 
-        /* Scale with Gain */
-        for( i = 0; i < psDec->subfr_length; i++ ) {
-            pxq[ i ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( vec_Q10[ i ], Gain_Q10 ), 4 ) );
-        }
-
         /* Update LPC filter state */
-        silk_memcpy( psDec->sLPC_Q14, &psDec->sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
         pexc_Q10 += psDec->subfr_length;
         pxq      += psDec->subfr_length;
     }
 
-    /* Copy to output */
-    silk_memcpy( xq, &psDec->outBuf[ psDec->ltp_mem_length ], psDec->frame_length * sizeof( opus_int16 ) );
+    /* Save LPC state */
+    silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) );
 
 #ifdef SAVE_ALL_INTERNAL_DATA
-    DEBUG_STORE_DATA( dec_sLTP_Q16.dat, &psDec->sLTP_Q16[ psDec->ltp_mem_length ], psDec->frame_length * sizeof( opus_int32 ));
+    DEBUG_STORE_DATA( dec_sLTP_Q16.dat, &sLTP_Q16[ psDec->ltp_mem_length ], psDec->frame_length * sizeof( opus_int32 ));
     DEBUG_STORE_DATA( dec_xq.dat, xq, psDec->frame_length * sizeof( opus_int16 ) );
 #endif
 }
--- a/silk/decoder_set_fs.c
+++ b/silk/decoder_set_fs.c
@@ -72,7 +72,7 @@
         if( psDec->fs_kHz != fs_kHz)
         {
            /* Reset part of the decoder state */
-           silk_memset( psDec->sLPC_Q14,     0,                    sizeof( psDec->sLPC_Q14 ) );
+           silk_memset( psDec->sLPC_Q14_buf, 0,                    sizeof( psDec->sLPC_Q14_buf ) );
            silk_memset( psDec->outBuf,       0, MAX_FRAME_LENGTH * sizeof( opus_int16 ) );
            silk_memset( psDec->prevNLSF_Q15, 0,                    sizeof( psDec->prevNLSF_Q15 ) );
         }
--- a/silk/structs.h
+++ b/silk/structs.h
@@ -255,10 +255,9 @@
 /********************************/
 typedef struct {
     opus_int32       prev_inv_gain_Q16;
-    opus_int32       sLTP_Q16[ 2 * MAX_FRAME_LENGTH ];
-    opus_int32       sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
     opus_int32       exc_Q10[ MAX_FRAME_LENGTH ];
-    opus_int16       outBuf[ 2 * MAX_FRAME_LENGTH ];             /* Buffer for output signal                                             */
+    opus_int32       sLPC_Q14_buf[ MAX_LPC_ORDER ];
+    opus_int16       outBuf[ MAX_FRAME_LENGTH + 2 * MAX_SUB_FRAME_LENGTH ];  /* Buffer for output signal                                 */
     opus_int16       delayBuf[ MAX_DECODER_DELAY ];              /* Buffer for delaying the SILK output prior to resampling              */
     opus_int         delay;                                      /* How much decoder delay to add                                        */
     opus_int         lagPrev;                                    /* Previous Lag                                                         */
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -418,7 +418,7 @@
             pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
 #else
         for (i=0;i<frame_size*st->channels;i++)
-            pcm[i] = pcm[i] + (1./32768.)*pcm_silk[i];
+            pcm[i] = pcm[i] + (opus_val16)((1./32768.)*pcm_silk[i]);
 #endif
     }