shithub: opus

--- a/silk/control.h

+++ b/silk/control.h

@@ -117,6 +117,9 @@

     /* I:   Number of samples per packet in milliseconds; 10/20/40/60                       */

     opus_int payloadSize_ms;

+    /* O:   Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz      */

+    opus_int prevPitchLag;

 } silk_DecControlStruct;

 #ifdef __cplusplus

--- a/silk/control_codec.c

+++ b/silk/control_codec.c

@@ -153,6 +153,7 @@

         } else {

             /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */

             opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ];

+            silk_resampler_state_struct  temp_resampler_state;

 #ifdef FIXED_POINT

             opus_int16 *x_bufFIX = psEnc->x_buf;

 #else

@@ -165,32 +166,21 @@

             silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp );

 #endif

-            if( silk_SMULBB( fs_kHz, 1000 ) < psEnc->sCmn.API_fs_Hz && psEnc->sCmn.fs_kHz != 0 ) {

-                /* Resample buffered data in x_buf to API_fs_Hz */

+            /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */

+            ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz );

-                silk_resampler_state_struct  temp_resampler_state;

+            /* Temporary resampling of x_buf data to API_fs_Hz */

+            ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );

-                /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */

-                ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz );

+            /* Calculate number of samples that has been temporarily upsampled */

+            nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );

-                /* Temporary resampling of x_buf data to API_fs_Hz */

-                ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );

+            /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */

+            ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ) );

-                /* Calculate number of samples that has been temporarily upsampled */

-                nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );

+            /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */

+            ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp );

-                /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */

-                ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ) );

-            } else {

-                /* Copy data */

-                silk_memcpy( x_buf_API_fs_Hz, x_bufFIX, nSamples_temp * sizeof( opus_int16 ) );

-            }

-            if( 1000 * fs_kHz != psEnc->sCmn.API_fs_Hz ) {

-                /* Correct resampler state (unless resampling by a factor 1) by resampling buffered data from API_fs_Hz to fs_kHz */

-                ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp );

-            }

 #ifndef FIXED_POINT

             silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz );

 #endif

@@ -251,14 +241,9 @@

     silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );

     if( psEnc->sCmn.fs_kHz != fs_kHz ) {

         /* reset part of the state */

-#ifdef FIXED_POINT

-        silk_memset( &psEnc->sShape,               0, sizeof( silk_shape_state_FIX ) );

-        silk_memset( &psEnc->sPrefilt,             0, sizeof( silk_prefilter_state_FIX ) );

-#else

-        silk_memset( &psEnc->sShape,               0, sizeof( silk_shape_state_FLP ) );

-        silk_memset( &psEnc->sPrefilt,             0, sizeof( silk_prefilter_state_FLP ) );

-#endif

-        silk_memset( &psEnc->sCmn.sNSQ,            0, sizeof( silk_nsq_state ) );

+        silk_memset( &psEnc->sShape,               0, sizeof( psEnc->sShape ) );

+        silk_memset( &psEnc->sPrefilt,             0, sizeof( psEnc->sPrefilt ) );

+        silk_memset( &psEnc->sCmn.sNSQ,            0, sizeof( psEnc->sCmn.sNSQ ) );

         silk_memset( psEnc->sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );

         silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );

         psEnc->sCmn.inputBufIx                  = 0;

@@ -272,6 +257,7 @@

         psEnc->sShape.LastGainIndex             = 10;

         psEnc->sCmn.sNSQ.lagPrev                = 100;

         psEnc->sCmn.sNSQ.prev_inv_gain_Q16      = 65536;

+        psEnc->sCmn.prevSignalType              = TYPE_NO_VOICE_ACTIVITY;

         psEnc->sCmn.fs_kHz = fs_kHz;

         if( psEnc->sCmn.fs_kHz == 8 ) {

--- a/silk/create_init_destroy.c

+++ b/silk/create_init_destroy.c

@@ -42,7 +42,7 @@

     /* Clear the entire encoder state, except anything copied */

     silk_memset( psDec, 0, sizeof( silk_decoder_state ) );

-    /* Used to deactivate e.g. LSF interpolation and fluctuation reduction */

+    /* Used to deactivate LSF interpolation */

     psDec->first_frame_after_reset = 1;

     psDec->prev_inv_gain_Q16 = 65536;

--- a/silk/dec_API.c

+++ b/silk/dec_API.c

@@ -31,14 +31,6 @@

 #include "API.h"

 #include "main.h"

-static const int dec_delay_matrix[3][5] = {

-/*SILK API 8  12  16  24  48 */

-/* 8 */   {3, 0, 2, 0, 0},

-/*12 */   {0, 8, 5, 7, 5},

-/*16 */   {0, 0, 8, 5, 5}

-};

 /************************/

 /* Decoder Super Struct */

 /************************/

@@ -47,6 +39,7 @@

     stereo_dec_state                sStereo;

     opus_int                         nChannelsAPI;

     opus_int                         nChannelsInternal;

+    opus_int                         prev_decode_only_middle;

 } silk_decoder;

 /*********************/

@@ -88,7 +81,7 @@

     opus_int32                           *nSamplesOut    /* O:   Number of samples decoded                       */

-    opus_int   i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR;

+    opus_int   i, n, delay, decode_only_middle = 0, ret = SILK_NO_ERROR;

     opus_int32 nSamplesOutDec, LBRR_symbol;

     opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ];

     opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];

@@ -96,10 +89,7 @@

     opus_int16 *resample_out_ptr;

     silk_decoder *psDec = ( silk_decoder * )decState;

     silk_decoder_state *channel_state = psDec->channel_state;

-    int delay;

-    delay = channel_state[ 0 ].delay;

     /**********************************/

     /* Test if first frame in payload */

     /**********************************/

@@ -109,16 +99,9 @@

-    /* Save previous sample frequency */

-    prev_fs_kHz = channel_state[ 0 ].fs_kHz;

     /* If Mono -> Stereo transition in bitstream: init state of second channel */

     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {

         ret += silk_init_decoder( &channel_state[ 1 ] );

-        if( psDec->nChannelsAPI == 2 ) {

-            silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );

-            silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, MAX_DECODER_DELAY*sizeof(opus_int16));

-        }

     for( n = 0; n < decControl->nChannelsInternal; n++ ) {

@@ -149,24 +132,17 @@

                 silk_assert( 0 );

                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;

-            silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec );

+            ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );

-    /* Initialize resampler when switching internal or external sampling frequency */

-    if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) {

-        channel_state[ 0 ].delay = dec_delay_matrix[rateID(silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ))][rateID(decControl->API_sampleRate)];

-        silk_assert(channel_state[ 0 ].delay <= MAX_DECODER_DELAY);

-        ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate );

-        if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {

-            silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );

-            channel_state[ 1 ].delay = channel_state[ 0 ].delay;

-        }

-    }

-    channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate;

+    delay = channel_state[ 0 ].delay;

     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {

         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );

         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );

+        silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );

+        silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, sizeof(channel_state[ 0 ].delayBuf));

     psDec->nChannelsAPI      = decControl->nChannelsAPI;

     psDec->nChannelsInternal = decControl->nChannelsInternal;

@@ -237,11 +213,20 @@

         } else {

             for( n = 0; n < 2; n++ ) {

-                MS_pred_Q13[n] = psDec->sStereo.pred_prev_Q13[n];

+                MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];

+    /* Reset side channel decoder prediction memory for first frame with side coding */

+    if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {

+        silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );

+        silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );

+        psDec->channel_state[ 1 ].lagPrev        = 100;

+        psDec->channel_state[ 1 ].LastGainIndex  = 10;

+        psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;

+    }

     /* Call decoder for one frame */

     for( n = 0; n < decControl->nChannelsInternal; n++ ) {

         if( n == 0 || decode_only_middle == 0 ) {

@@ -253,10 +238,10 @@

     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {

         /* Convert Mid/Side to Left/Right */

-        silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][delay], &samplesOut1_tmp[ 1 ][delay], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );

+        silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][ delay ], &samplesOut1_tmp[ 1 ][ delay ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );

     } else {

         /* Buffering */

-        silk_memcpy( &samplesOut1_tmp[ 0 ][delay], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );

+        silk_memcpy( &samplesOut1_tmp[ 0 ][ delay ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );

         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) );

@@ -272,10 +257,10 @@

     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {

-        silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY-delay ], delay*sizeof(opus_int16));

+        silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY - delay ], delay * sizeof(opus_int16));

         /* Resample decoded signal to API_sampleRate */

         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );

-        silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY*sizeof(opus_int16));

+        silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY * sizeof(opus_int16));

         /* Interleave if stereo output and stereo stream */

         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {

@@ -291,6 +276,16 @@

             samplesOut[ 0 + 2 * i ] = samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];

+    /* Export pitch lag, measured at 48 kHz sampling rate */

+    if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {

+        int mult_tab[ 3 ] = { 6, 4, 3 };

+        decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];

+    } else {

+        decControl->prevPitchLag = 0;

+    }

+    psDec->prev_decode_only_middle = decode_only_middle;

     return ret;

--- a/silk/decoder_set_fs.c

+++ b/silk/decoder_set_fs.c

@@ -31,24 +31,59 @@

 #include "main.h"

+static const int dec_delay_matrix[3][5] = {

+/*SILK API 8  12  16  24  48 */

+/* 8 */   {3, 0, 2, 0, 0},

+/*12 */   {0, 8, 5, 7, 5},

+/*16 */   {0, 0, 8, 5, 5}

+};

 /* Set decoder sampling rate */

-void silk_decoder_set_fs(

+opus_int silk_decoder_set_fs(

     silk_decoder_state              *psDec,             /* I/O  Decoder state pointer                       */

-    opus_int                         fs_kHz              /* I    Sampling frequency (kHz)                    */

+    opus_int                         fs_kHz,             /* I    Sampling frequency (kHz)                    */

+    opus_int                         fs_API_Hz           /* I    API Sampling frequency (Hz)                 */

-    opus_int frame_length;

+    opus_int frame_length, ret = 0;

     silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );

     silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 );

+    /* New (sub)frame length */

     psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz );

     frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length );

+    /* Initialize resampler when switching internal or external sampling frequency */

+    if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) {

+        /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */

+        opus_int16 temp_buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];

+        silk_resampler_state_struct  temp_resampler_state;

+        /* New delay value */

+        psDec->delay = dec_delay_matrix[ rateID( silk_SMULBB( fs_kHz, 1000 ) ) ][ rateID( fs_API_Hz ) ];

+        silk_assert( psDec->delay <= MAX_DECODER_DELAY );

+        if( psDec->fs_kHz != fs_kHz && psDec->fs_kHz > 0 ) {

+            /* Initialize resampler for temporary resampling of outBuf data to the new internal sampling rate */

+            ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psDec->fs_kHz, 1000 ), silk_SMULBB( fs_kHz, 1000 ) );

+            /* Temporary resampling of outBuf data to the new internal sampling rate */

+            silk_memcpy( temp_buf, psDec->outBuf, psDec->frame_length * sizeof( opus_int16 ) );

+            ret += silk_resampler( &temp_resampler_state, psDec->outBuf, temp_buf, psDec->frame_length );

+        }

+        /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */

+        ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz );

+        /* Correct resampler state by resampling buffered data from fs_kHz to API_fs_Hz */

+        ret += silk_resampler( &psDec->resampler_state, temp_buf, psDec->outBuf, frame_length );

+        psDec->fs_API_hz = fs_API_Hz;

+    }

     if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) {

-        psDec->fs_kHz  = fs_kHz;

-        psDec->frame_length   = frame_length;

-        psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );

-        if( psDec->fs_kHz == 8 ) {

+        if( fs_kHz == 8 ) {

             if( psDec->nb_subfr == MAX_NB_SUBFR ) {

                 psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;

             } else {

@@ -61,40 +96,38 @@

                 psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;

-        if( psDec->fs_kHz == 8 || psDec->fs_kHz == 12 ) {

-            psDec->LPC_order = MIN_LPC_ORDER;

-            psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB;

-        } else {

-            psDec->LPC_order = MAX_LPC_ORDER;

-            psDec->psNLSF_CB = &silk_NLSF_CB_WB;

+        if( psDec->fs_kHz != fs_kHz ) {

+            psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );

+            if( fs_kHz == 8 || fs_kHz == 12 ) {

+                psDec->LPC_order = MIN_LPC_ORDER;

+                psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB;

+            } else {

+                psDec->LPC_order = MAX_LPC_ORDER;

+                psDec->psNLSF_CB = &silk_NLSF_CB_WB;

+            }

+            if( fs_kHz == 16 ) {

+                psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;

+            } else if( fs_kHz == 12 ) {

+                psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;

+            } else if( fs_kHz == 8 ) {

+                psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;

+            } else {

+                /* unsupported sampling rate */

+                silk_assert( 0 );

+            }

+            psDec->first_frame_after_reset = 1;

+            psDec->lagPrev                 = 100;

+            psDec->LastGainIndex           = 10;

+            psDec->prevSignalType          = TYPE_NO_VOICE_ACTIVITY;

-        if( psDec->fs_kHz != fs_kHz)

-        {

-           /* Reset part of the decoder state */

-           silk_memset( psDec->sLPC_Q14_buf, 0,                    sizeof( psDec->sLPC_Q14_buf ) );

-           silk_memset( psDec->outBuf,       0, MAX_FRAME_LENGTH * sizeof( opus_int16 ) );

-           silk_memset( psDec->prevNLSF_Q15, 0,                    sizeof( psDec->prevNLSF_Q15 ) );

-        }

-        psDec->lagPrev                 = 100;

-        psDec->LastGainIndex           = 10;

-        psDec->prevSignalType          = TYPE_NO_VOICE_ACTIVITY;

-        if( psDec->fs_kHz != fs_kHz)

-           psDec->first_frame_after_reset = 1;

-        if( fs_kHz == 16 ) {

-            psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;

-        } else if( fs_kHz == 12 ) {

-            psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;

-        } else if( fs_kHz == 8 ) {

-            psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;

-        } else {

-            /* unsupported sampling rate */

-            silk_assert( 0 );

-        }

+        psDec->fs_kHz       = fs_kHz;

+        psDec->frame_length = frame_length;

     /* Check that settings are valid */

     silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH );

+    return ret;

--- a/silk/define.h

+++ b/silk/define.h

@@ -214,12 +214,12 @@

 #define NLSF_QUANT_DEL_DEC_STATES               ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 )

 /* Transition filtering for mode switching */

-#  define TRANSITION_TIME_MS                    5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/

-#  define TRANSITION_NB                         3 /* Hardcoded in tables */

-#  define TRANSITION_NA                         2 /* Hardcoded in tables */

-#  define TRANSITION_INT_NUM                    5 /* Hardcoded in tables */

-#  define TRANSITION_FRAMES                     ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS ) /* todo: needs to be made flexible for 10 ms frames*/

-#  define TRANSITION_INT_STEPS                  ( TRANSITION_FRAMES  / ( TRANSITION_INT_NUM - 1 ) )

+#define TRANSITION_TIME_MS                      5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/

+#define TRANSITION_NB                           3 /* Hardcoded in tables */

+#define TRANSITION_NA                           2 /* Hardcoded in tables */

+#define TRANSITION_INT_NUM                      5 /* Hardcoded in tables */

+#define TRANSITION_FRAMES                       ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS )

+#define TRANSITION_INT_STEPS                    ( TRANSITION_FRAMES  / ( TRANSITION_INT_NUM - 1 ) )

 /* BWE factors to apply after packet loss */

 #define BWE_AFTER_LOSS_Q16                      63570

--- a/silk/enc_API.c

+++ b/silk/enc_API.c

@@ -237,13 +237,13 @@

             for( n = 0; n < nSamplesFromInput; n++ ) {

                 buf[ n+delay ] = samplesIn[ 2 * n ];

-            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));

             /* Making sure to start both resamplers from the same state when switching from mono to stereo */

             if(psEnc->nPrevChannelsInternal == 1 && id==0) {

                silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));

                silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16));

-            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

@@ -252,24 +252,24 @@

             nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;

             nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );

             for( n = 0; n < nSamplesFromInput; n++ ) {

-                buf[ n+delay ] = samplesIn[ 2 * n + 1 ];

+                buf[ n + delay ] = samplesIn[ 2 * n + 1 ];

-            silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

+            silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

-            silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

+            silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;

         } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {

             /* Combine left and right channels before resampling */

             for( n = 0; n < nSamplesFromInput; n++ ) {

-                buf[ n+delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ],  1 );

+                buf[ n + delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ],  1 );

             if(psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded==0) {

                for ( n = 0; n<MAX_ENCODER_DELAY; n++ )

                   psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ] = silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ]+(opus_int32)psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ n ], 1);

-            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

             /* On the first mono frame, average the results for the two resampler states  */

@@ -281,17 +281,16 @@

                         silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]

                                   + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);

-            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;

         } else {

             silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );

-            silk_memcpy(buf+delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));

-            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

+            silk_memcpy(buf + delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));

+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

-            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;

@@ -387,6 +386,22 @@

                 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );

+            /* Reset side channel encoder memory for first frame with side coding */

+            if( encControl->nChannelsInternal == 2 && psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 && psEnc->prev_decode_only_middle == 1 ) {

+                silk_memset( &psEnc->state_Fxx[ 1 ].sShape,               0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );

+                silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt,             0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );

+                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ,            0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );

+                silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );

+                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );

+                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf,        0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );

+                psEnc->state_Fxx[ 1 ].sCmn.prevLag                = 100;

+                psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev           = 100;

+                psEnc->state_Fxx[ 1 ].sShape.LastGainIndex        = 10;

+                psEnc->state_Fxx[ 1 ].sCmn.prevSignalType         = TYPE_NO_VOICE_ACTIVITY;

+                psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;

+            }

+            psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ];

             /* Encode */

             for( n = 0; n < encControl->nChannelsInternal; n++ ) {

                 if( encControl->nChannelsInternal == 1 ) {

@@ -450,6 +465,7 @@

             break;

     psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;

     encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;

--- a/silk/fixed/structs_FIX.h

+++ b/silk/fixed/structs_FIX.h

@@ -123,6 +123,7 @@

     opus_int                 nPrevChannelsInternal;

     opus_int                 timeSinceSwitchAllowed_ms;

     opus_int                 allowBandwidthSwitch;

+    opus_int                     prev_decode_only_middle;

 } silk_encoder;

--- a/silk/float/structs_FLP.h

+++ b/silk/float/structs_FLP.h

@@ -121,6 +121,7 @@

     opus_int                     nPrevChannelsInternal;

     opus_int                     timeSinceSwitchAllowed_ms;

     opus_int                     allowBandwidthSwitch;

+    opus_int                     prev_decode_only_middle;

 } silk_encoder;

 #ifdef __cplusplus

--- a/silk/init_encoder.c

+++ b/silk/init_encoder.c

@@ -49,7 +49,7 @@

     psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 );

     psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15;

-    /* Used to deactivate LSF interpolation, fluctuation reduction, pitch prediction */

+    /* Used to deactivate LSF interpolation, pitch prediction */

     psEnc->sCmn.first_frame_after_reset = 1;

     /* Initialize Silk VAD */

--- a/silk/main.h

+++ b/silk/main.h

@@ -363,9 +363,10 @@

);

 /* Set decoder sampling rate */

-void silk_decoder_set_fs(

+opus_int silk_decoder_set_fs(

     silk_decoder_state              *psDec,             /* I/O  Decoder state pointer                       */

-    opus_int                         fs_kHz              /* I    Sampling frequency (kHz)                    */

+    opus_int                         fs_kHz,             /* I    Sampling frequency (kHz)                    */

+    opus_int                         fs_API_Hz           /* I    API Sampling frequency (Hz)                 */

);

 /****************/

--- a/silk/stereo_LR_to_MS.c

+++ b/silk/stereo_LR_to_MS.c

@@ -87,7 +87,7 @@

     smooth_coef_Q16 = is10msFrame ?

         SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :

         SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF,     16 );

-    smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8 , prev_speech_act_Q8 ), smooth_coef_Q16 );

+    smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );

     pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );

     pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );

@@ -168,8 +168,20 @@

         width_Q14 = state->smth_width_Q14;

-    if (*mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1)

-    {

+    /* Make sure to keep on encoding until the tapered output has been transmitted */

+    if( *mid_only_flag == 1 ) {

+        state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz;

+        if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) {

+            *mid_only_flag = 0;

+        } else {

+            /* Limit to avoid wrapping around */

+            state->silent_side_len = 10000;

+        }

+    } else {

+        state->silent_side_len = 0;

+    }

+    if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) {

         mid_side_rates_bps[ 1 ] = 1;

         mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]);

--- a/silk/stereo_find_predictor.c

+++ b/silk/stereo_find_predictor.c

@@ -42,26 +42,32 @@

     opus_int   scale, scale1, scale2;

-    opus_int32 nrgx, nrgy, corr, pred_Q13;

+    opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10;

     /* Find  predictor */

     silk_sum_sqr_shift( &nrgx, &scale1, x, length );

     silk_sum_sqr_shift( &nrgy, &scale2, y, length );

-    scale = silk_max( scale1, scale2 );

+    scale = silk_max_int( scale1, scale2 ) + 1;

     scale = scale + ( scale & 1 );          /* make even */

     nrgy = silk_RSHIFT32( nrgy, scale - scale2 );

     nrgx = silk_RSHIFT32( nrgx, scale - scale1 );

-    nrgx = silk_max( nrgx, 1 );

+    nrgx = silk_max_int( nrgx, 1 );

     corr = silk_inner_prod_aligned_scale( x, y, scale, length );

     pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 );

-    pred_Q13 = silk_SAT16( pred_Q13 );

+    pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 );

+    pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 );

+    /* Faster update for signals with large prediction parameters */

+    smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) );

     /* Smoothed mid and residual norms */

     silk_assert( smooth_coef_Q16 < 32768 );

     scale = silk_RSHIFT( scale, 1 );

     mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ],

         smooth_coef_Q16 );

-    nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 );

+    /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */

+    nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 );

+    nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 );

     mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ],

         smooth_coef_Q16 );

--- a/silk/structs.h

+++ b/silk/structs.h

@@ -101,6 +101,7 @@

     opus_int32                   mid_side_amp_Q0[ 4 ];

     opus_int16                   smth_width_Q14;

     opus_int16                   width_prev_Q14;

+    opus_int16                   silent_side_len;

     opus_int8                    predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ];

     opus_int8                    mid_only_flags[ MAX_FRAMES_PER_PACKET ];

 } stereo_enc_state;

@@ -172,7 +173,7 @@

     opus_int                         LTPQuantLowComplexity;          /* Flag for low complexity LTP quantization                             */

     opus_int                         mu_LTP_Q9;                      /* Rate-distortion tradeoff in LTP quantization                         */

     opus_int                         NLSF_MSVQ_Survivors;            /* Number of survivors in NLSF MSVQ                                     */

-    opus_int                         first_frame_after_reset;        /* Flag for deactivating NLSF interp. and fluc. reduction after resets  */

+    opus_int                         first_frame_after_reset;        /* Flag for deactivating NLSF interpolation, pitch prediction           */

     opus_int                         controlled_since_last_payload;  /* Flag for ensuring codec_control only runs once per packet            */

     opus_int                         warping_Q16;                    /* Warping parameter for warped noise shaping                           */

     opus_int                         useCBR;                         /* Flag to enable constant bitrate                                      */

@@ -263,7 +264,7 @@

     opus_int         lagPrev;                                    /* Previous Lag                                                         */

     opus_int8        LastGainIndex;                              /* Previous gain index                                                  */

     opus_int         fs_kHz;                                     /* Sampling frequency in kHz                                            */

-    opus_int32       prev_API_sampleRate;                        /* Previous API sample frequency (Hz)                                   */

+    opus_int32       fs_API_hz;                                  /* API sample frequency (Hz)                                            */

     opus_int         nb_subfr;                                   /* Number of 5 ms subframes in a frame                                  */

     opus_int         frame_length;                               /* Frame length (samples)                                               */

     opus_int         subfr_length;                               /* Subframe length (samples)                                            */

@@ -270,7 +271,7 @@

     opus_int         ltp_mem_length;                             /* Length of LTP memory                                                 */

     opus_int         LPC_order;                                  /* LPC order                                                            */

     opus_int16       prevNLSF_Q15[ MAX_LPC_ORDER ];              /* Used to interpolate LSFs                                             */

-    opus_int         first_frame_after_reset;                    /* Flag for deactivating NLSF interp. and fluc. reduction after resets  */

+    opus_int         first_frame_after_reset;                    /* Flag for deactivating NLSF interpolation                             */

     const opus_uint8 *pitch_lag_low_bits_iCDF;                   /* Pointer to iCDF table for low bits of pitch lag index                */

     const opus_uint8 *pitch_contour_iCDF;                        /* Pointer to iCDF table for pitch contour index                        */

--- a/src/opus_decoder.c

+++ b/src/opus_decoder.c

@@ -802,9 +802,7 @@

       if (st->prev_mode == MODE_CELT_ONLY)

          celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value));

       else

-         *value = ((silk_decoder_state*)silk_dec)->indices.signalType == TYPE_VOICED

-         ? ((silk_decoder_state*)silk_dec)->lagPrev*48/((silk_decoder_state*)silk_dec)->fs_kHz

-         : 0;

+         *value = st->DecControl.prevPitchLag;

    break;

    default: