ref: b24e57462724185f8922455a2196607f06b98e41
parent: a4885a5fd5165d4732929328de613a35a3d3b359
author: Jean-Marc Valin <[email protected]>
date: Tue Oct 11 17:09:14 EDT 2011
Misc bug fixes - There was a bug where the decoder resampler was not properly initialized when fs_kHz == API_fs_kHz. In that case the resampler would continue to upsample, and the output was corrupt. - The delay value in the decoder was taken from the state before it was potentially updated. This caused the decoder to apply the new dalay value one frame late - The encoder and decoder states are now updated more consistently, when the sampling rate changes (pesq liked these changes) - Properly resetting the side channel encoder and decoder for the first frame with side coding active again - Faster updating the "ratio" value in the LR_to_MS() code for large prediction values means that for certain extreme/artificial input signals the output looks better
--- a/silk/control.h
+++ b/silk/control.h
@@ -117,6 +117,9 @@
/* I: Number of samples per packet in milliseconds; 10/20/40/60 */
opus_int payloadSize_ms;
+
+ /* O: Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz */
+ opus_int prevPitchLag;
} silk_DecControlStruct;
#ifdef __cplusplus
--- a/silk/control_codec.c
+++ b/silk/control_codec.c
@@ -153,6 +153,7 @@
} else {
/* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */
opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ];
+ silk_resampler_state_struct temp_resampler_state;
#ifdef FIXED_POINT
opus_int16 *x_bufFIX = psEnc->x_buf;
#else
@@ -165,32 +166,21 @@
silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp );
#endif
- if( silk_SMULBB( fs_kHz, 1000 ) < psEnc->sCmn.API_fs_Hz && psEnc->sCmn.fs_kHz != 0 ) {
- /* Resample buffered data in x_buf to API_fs_Hz */
+ /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
+ ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz );
- silk_resampler_state_struct temp_resampler_state;
+ /* Temporary resampling of x_buf data to API_fs_Hz */
+ ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );
- /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
- ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz );
+ /* Calculate number of samples that has been temporarily upsampled */
+ nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );
- /* Temporary resampling of x_buf data to API_fs_Hz */
- ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );
+ /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
+ ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ) );
- /* Calculate number of samples that has been temporarily upsampled */
- nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );
+ /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */
+ ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp );
- /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
- ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ) );
-
- } else {
- /* Copy data */
- silk_memcpy( x_buf_API_fs_Hz, x_bufFIX, nSamples_temp * sizeof( opus_int16 ) );
- }
-
- if( 1000 * fs_kHz != psEnc->sCmn.API_fs_Hz ) {
- /* Correct resampler state (unless resampling by a factor 1) by resampling buffered data from API_fs_Hz to fs_kHz */
- ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp );
- }
#ifndef FIXED_POINT
silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz );
#endif
@@ -251,14 +241,9 @@
silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
if( psEnc->sCmn.fs_kHz != fs_kHz ) {
/* reset part of the state */
-#ifdef FIXED_POINT
- silk_memset( &psEnc->sShape, 0, sizeof( silk_shape_state_FIX ) );
- silk_memset( &psEnc->sPrefilt, 0, sizeof( silk_prefilter_state_FIX ) );
-#else
- silk_memset( &psEnc->sShape, 0, sizeof( silk_shape_state_FLP ) );
- silk_memset( &psEnc->sPrefilt, 0, sizeof( silk_prefilter_state_FLP ) );
-#endif
- silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( silk_nsq_state ) );
+ silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) );
+ silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) );
+ silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) );
silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
psEnc->sCmn.inputBufIx = 0;
@@ -272,6 +257,7 @@
psEnc->sShape.LastGainIndex = 10;
psEnc->sCmn.sNSQ.lagPrev = 100;
psEnc->sCmn.sNSQ.prev_inv_gain_Q16 = 65536;
+ psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
psEnc->sCmn.fs_kHz = fs_kHz;
if( psEnc->sCmn.fs_kHz == 8 ) {
--- a/silk/create_init_destroy.c
+++ b/silk/create_init_destroy.c
@@ -42,7 +42,7 @@
/* Clear the entire encoder state, except anything copied */
silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
- /* Used to deactivate e.g. LSF interpolation and fluctuation reduction */
+ /* Used to deactivate LSF interpolation */
psDec->first_frame_after_reset = 1;
psDec->prev_inv_gain_Q16 = 65536;
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -31,14 +31,6 @@
#include "API.h"
#include "main.h"
-static const int dec_delay_matrix[3][5] = {
-/*SILK API 8 12 16 24 48 */
-/* 8 */ {3, 0, 2, 0, 0},
-/*12 */ {0, 8, 5, 7, 5},
-/*16 */ {0, 0, 8, 5, 5}
-};
-
-
/************************/
/* Decoder Super Struct */
/************************/
@@ -47,6 +39,7 @@
stereo_dec_state sStereo;
opus_int nChannelsAPI;
opus_int nChannelsInternal;
+ opus_int prev_decode_only_middle;
} silk_decoder;
/*********************/
@@ -88,7 +81,7 @@
opus_int32 *nSamplesOut /* O: Number of samples decoded */
)
{
- opus_int i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR;
+ opus_int i, n, delay, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ];
opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
@@ -96,10 +89,7 @@
opus_int16 *resample_out_ptr;
silk_decoder *psDec = ( silk_decoder * )decState;
silk_decoder_state *channel_state = psDec->channel_state;
- int delay;
- delay = channel_state[ 0 ].delay;
-
/**********************************/
/* Test if first frame in payload */
/**********************************/
@@ -109,16 +99,9 @@
}
}
- /* Save previous sample frequency */
- prev_fs_kHz = channel_state[ 0 ].fs_kHz;
-
/* If Mono -> Stereo transition in bitstream: init state of second channel */
if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
ret += silk_init_decoder( &channel_state[ 1 ] );
- if( psDec->nChannelsAPI == 2 ) {
- silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
- silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, MAX_DECODER_DELAY*sizeof(opus_int16));
- }
}
for( n = 0; n < decControl->nChannelsInternal; n++ ) {
@@ -149,24 +132,17 @@
silk_assert( 0 );
return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
}
- silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec );
+ ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
}
}
- /* Initialize resampler when switching internal or external sampling frequency */
- if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) {
- channel_state[ 0 ].delay = dec_delay_matrix[rateID(silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ))][rateID(decControl->API_sampleRate)];
- silk_assert(channel_state[ 0 ].delay <= MAX_DECODER_DELAY);
- ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate );
- if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
- silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
- channel_state[ 1 ].delay = channel_state[ 0 ].delay;
- }
- }
- channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate;
+ delay = channel_state[ 0 ].delay;
+
if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
+ silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
+ silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, sizeof(channel_state[ 0 ].delayBuf));
}
psDec->nChannelsAPI = decControl->nChannelsAPI;
psDec->nChannelsInternal = decControl->nChannelsInternal;
@@ -237,11 +213,20 @@
}
} else {
for( n = 0; n < 2; n++ ) {
- MS_pred_Q13[n] = psDec->sStereo.pred_prev_Q13[n];
+ MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
}
}
}
+ /* Reset side channel decoder prediction memory for first frame with side coding */
+ if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
+ silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
+ silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
+ psDec->channel_state[ 1 ].lagPrev = 100;
+ psDec->channel_state[ 1 ].LastGainIndex = 10;
+ psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+ }
+
/* Call decoder for one frame */
for( n = 0; n < decControl->nChannelsInternal; n++ ) {
if( n == 0 || decode_only_middle == 0 ) {
@@ -253,10 +238,10 @@
if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
/* Convert Mid/Side to Left/Right */
- silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][delay], &samplesOut1_tmp[ 1 ][delay], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
+ silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][ delay ], &samplesOut1_tmp[ 1 ][ delay ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
} else {
/* Buffering */
- silk_memcpy( &samplesOut1_tmp[ 0 ][delay], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+ silk_memcpy( &samplesOut1_tmp[ 0 ][ delay ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) );
}
@@ -272,10 +257,10 @@
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
- silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY-delay ], delay*sizeof(opus_int16));
+ silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY - delay ], delay * sizeof(opus_int16));
/* Resample decoded signal to API_sampleRate */
ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
- silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY * sizeof(opus_int16));
/* Interleave if stereo output and stereo stream */
if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
@@ -291,6 +276,16 @@
samplesOut[ 0 + 2 * i ] = samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
}
}
+
+ /* Export pitch lag, measured at 48 kHz sampling rate */
+ if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
+ int mult_tab[ 3 ] = { 6, 4, 3 };
+ decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
+ } else {
+ decControl->prevPitchLag = 0;
+ }
+
+ psDec->prev_decode_only_middle = decode_only_middle;
return ret;
}
--- a/silk/decoder_set_fs.c
+++ b/silk/decoder_set_fs.c
@@ -31,24 +31,59 @@
#include "main.h"
+static const int dec_delay_matrix[3][5] = {
+/*SILK API 8 12 16 24 48 */
+/* 8 */ {3, 0, 2, 0, 0},
+/*12 */ {0, 8, 5, 7, 5},
+/*16 */ {0, 0, 8, 5, 5}
+};
+
/* Set decoder sampling rate */
-void silk_decoder_set_fs(
+opus_int silk_decoder_set_fs(
silk_decoder_state *psDec, /* I/O Decoder state pointer */
- opus_int fs_kHz /* I Sampling frequency (kHz) */
+ opus_int fs_kHz, /* I Sampling frequency (kHz) */
+ opus_int fs_API_Hz /* I API Sampling frequency (Hz) */
)
{
- opus_int frame_length;
+ opus_int frame_length, ret = 0;
silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 );
+ /* New (sub)frame length */
psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz );
frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length );
+
+ /* Initialize resampler when switching internal or external sampling frequency */
+ if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) {
+ /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */
+ opus_int16 temp_buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];
+ silk_resampler_state_struct temp_resampler_state;
+
+ /* New delay value */
+ psDec->delay = dec_delay_matrix[ rateID( silk_SMULBB( fs_kHz, 1000 ) ) ][ rateID( fs_API_Hz ) ];
+ silk_assert( psDec->delay <= MAX_DECODER_DELAY );
+
+ if( psDec->fs_kHz != fs_kHz && psDec->fs_kHz > 0 ) {
+ /* Initialize resampler for temporary resampling of outBuf data to the new internal sampling rate */
+ ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psDec->fs_kHz, 1000 ), silk_SMULBB( fs_kHz, 1000 ) );
+
+ /* Temporary resampling of outBuf data to the new internal sampling rate */
+ silk_memcpy( temp_buf, psDec->outBuf, psDec->frame_length * sizeof( opus_int16 ) );
+ ret += silk_resampler( &temp_resampler_state, psDec->outBuf, temp_buf, psDec->frame_length );
+ }
+
+ /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */
+ ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz );
+
+ /* Correct resampler state by resampling buffered data from fs_kHz to API_fs_Hz */
+ ret += silk_resampler( &psDec->resampler_state, temp_buf, psDec->outBuf, frame_length );
+
+ psDec->fs_API_hz = fs_API_Hz;
+ }
+
if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) {
- psDec->fs_kHz = fs_kHz;
- psDec->frame_length = frame_length;
- psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
- if( psDec->fs_kHz == 8 ) {
+ if( fs_kHz == 8 ) {
if( psDec->nb_subfr == MAX_NB_SUBFR ) {
psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
} else {
@@ -61,40 +96,38 @@
psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
}
}
- if( psDec->fs_kHz == 8 || psDec->fs_kHz == 12 ) {
- psDec->LPC_order = MIN_LPC_ORDER;
- psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB;
- } else {
- psDec->LPC_order = MAX_LPC_ORDER;
- psDec->psNLSF_CB = &silk_NLSF_CB_WB;
+ if( psDec->fs_kHz != fs_kHz ) {
+ psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
+ if( fs_kHz == 8 || fs_kHz == 12 ) {
+ psDec->LPC_order = MIN_LPC_ORDER;
+ psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB;
+ } else {
+ psDec->LPC_order = MAX_LPC_ORDER;
+ psDec->psNLSF_CB = &silk_NLSF_CB_WB;
+ }
+ if( fs_kHz == 16 ) {
+ psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
+ } else if( fs_kHz == 12 ) {
+ psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
+ } else if( fs_kHz == 8 ) {
+ psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
+ } else {
+ /* unsupported sampling rate */
+ silk_assert( 0 );
+ }
+ psDec->first_frame_after_reset = 1;
+ psDec->lagPrev = 100;
+ psDec->LastGainIndex = 10;
+ psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY;
}
- if( psDec->fs_kHz != fs_kHz)
- {
- /* Reset part of the decoder state */
- silk_memset( psDec->sLPC_Q14_buf, 0, sizeof( psDec->sLPC_Q14_buf ) );
- silk_memset( psDec->outBuf, 0, MAX_FRAME_LENGTH * sizeof( opus_int16 ) );
- silk_memset( psDec->prevNLSF_Q15, 0, sizeof( psDec->prevNLSF_Q15 ) );
- }
- psDec->lagPrev = 100;
- psDec->LastGainIndex = 10;
- psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY;
- if( psDec->fs_kHz != fs_kHz)
- psDec->first_frame_after_reset = 1;
-
- if( fs_kHz == 16 ) {
- psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
- } else if( fs_kHz == 12 ) {
- psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
- } else if( fs_kHz == 8 ) {
- psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
- } else {
- /* unsupported sampling rate */
- silk_assert( 0 );
- }
+ psDec->fs_kHz = fs_kHz;
+ psDec->frame_length = frame_length;
}
/* Check that settings are valid */
silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH );
+
+ return ret;
}
--- a/silk/define.h
+++ b/silk/define.h
@@ -214,12 +214,12 @@
#define NLSF_QUANT_DEL_DEC_STATES ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 )
/* Transition filtering for mode switching */
-# define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/
-# define TRANSITION_NB 3 /* Hardcoded in tables */
-# define TRANSITION_NA 2 /* Hardcoded in tables */
-# define TRANSITION_INT_NUM 5 /* Hardcoded in tables */
-# define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS ) /* todo: needs to be made flexible for 10 ms frames*/
-# define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) )
+#define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/
+#define TRANSITION_NB 3 /* Hardcoded in tables */
+#define TRANSITION_NA 2 /* Hardcoded in tables */
+#define TRANSITION_INT_NUM 5 /* Hardcoded in tables */
+#define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS )
+#define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) )
/* BWE factors to apply after packet loss */
#define BWE_AFTER_LOSS_Q16 63570
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -237,13 +237,13 @@
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n+delay ] = samplesIn[ 2 * n ];
}
- silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
/* Making sure to start both resamplers from the same state when switching from mono to stereo */
if(psEnc->nPrevChannelsInternal == 1 && id==0) {
silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16));
}
- silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@@ -252,24 +252,24 @@
nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n+delay ] = samplesIn[ 2 * n + 1 ];
+ buf[ n + delay ] = samplesIn[ 2 * n + 1 ];
}
- silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
- silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
} else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
/* Combine left and right channels before resampling */
for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n+delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 );
+ buf[ n + delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 );
}
if(psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded==0) {
for ( n = 0; n<MAX_ENCODER_DELAY; n++ )
psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ] = silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ n ]+(opus_int32)psEnc->state_Fxx[ 1 ].sCmn.delayBuf[ n ], 1);
}
- silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
/* On the first mono frame, average the results for the two resampler states */
@@ -281,17 +281,16 @@
silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
+ psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
}
-
}
- silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
} else {
silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
- silk_memcpy(buf+delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));
- silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));
+ silk_memcpy(buf + delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));
+ silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[ MAX_ENCODER_DELAY - delay ], delay * sizeof(opus_int16));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
- silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
+ silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf + nSamplesFromInput + delay - MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));
psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
}
@@ -387,6 +386,22 @@
silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
}
+ /* Reset side channel encoder memory for first frame with side coding */
+ if( encControl->nChannelsInternal == 2 && psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 && psEnc->prev_decode_only_middle == 1 ) {
+ silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
+ silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );
+ psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
+ psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
+ psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;
+ }
+ psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ];
+
/* Encode */
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
if( encControl->nChannelsInternal == 1 ) {
@@ -450,6 +465,7 @@
break;
}
}
+
psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
--- a/silk/fixed/structs_FIX.h
+++ b/silk/fixed/structs_FIX.h
@@ -123,6 +123,7 @@
opus_int nPrevChannelsInternal;
opus_int timeSinceSwitchAllowed_ms;
opus_int allowBandwidthSwitch;
+ opus_int prev_decode_only_middle;
} silk_encoder;
--- a/silk/float/structs_FLP.h
+++ b/silk/float/structs_FLP.h
@@ -121,6 +121,7 @@
opus_int nPrevChannelsInternal;
opus_int timeSinceSwitchAllowed_ms;
opus_int allowBandwidthSwitch;
+ opus_int prev_decode_only_middle;
} silk_encoder;
#ifdef __cplusplus
--- a/silk/init_encoder.c
+++ b/silk/init_encoder.c
@@ -49,7 +49,7 @@
psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 );
psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15;
- /* Used to deactivate LSF interpolation, fluctuation reduction, pitch prediction */
+ /* Used to deactivate LSF interpolation, pitch prediction */
psEnc->sCmn.first_frame_after_reset = 1;
/* Initialize Silk VAD */
--- a/silk/main.h
+++ b/silk/main.h
@@ -363,9 +363,10 @@
);
/* Set decoder sampling rate */
-void silk_decoder_set_fs(
+opus_int silk_decoder_set_fs(
silk_decoder_state *psDec, /* I/O Decoder state pointer */
- opus_int fs_kHz /* I Sampling frequency (kHz) */
+ opus_int fs_kHz, /* I Sampling frequency (kHz) */
+ opus_int fs_API_Hz /* I API Sampling frequency (Hz) */
);
/****************/
--- a/silk/stereo_LR_to_MS.c
+++ b/silk/stereo_LR_to_MS.c
@@ -87,7 +87,7 @@
smooth_coef_Q16 = is10msFrame ?
SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 );
- smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8 , prev_speech_act_Q8 ), smooth_coef_Q16 );
+ smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );
pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );
pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );
@@ -168,8 +168,20 @@
width_Q14 = state->smth_width_Q14;
}
- if (*mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1)
- {
+ /* Make sure to keep on encoding until the tapered output has been transmitted */
+ if( *mid_only_flag == 1 ) {
+ state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz;
+ if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) {
+ *mid_only_flag = 0;
+ } else {
+ /* Limit to avoid wrapping around */
+ state->silent_side_len = 10000;
+ }
+ } else {
+ state->silent_side_len = 0;
+ }
+
+ if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) {
mid_side_rates_bps[ 1 ] = 1;
mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]);
}
--- a/silk/stereo_find_predictor.c
+++ b/silk/stereo_find_predictor.c
@@ -42,26 +42,32 @@
)
{
opus_int scale, scale1, scale2;
- opus_int32 nrgx, nrgy, corr, pred_Q13;
+ opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10;
/* Find predictor */
silk_sum_sqr_shift( &nrgx, &scale1, x, length );
silk_sum_sqr_shift( &nrgy, &scale2, y, length );
- scale = silk_max( scale1, scale2 );
+ scale = silk_max_int( scale1, scale2 ) + 1;
scale = scale + ( scale & 1 ); /* make even */
nrgy = silk_RSHIFT32( nrgy, scale - scale2 );
nrgx = silk_RSHIFT32( nrgx, scale - scale1 );
- nrgx = silk_max( nrgx, 1 );
+ nrgx = silk_max_int( nrgx, 1 );
corr = silk_inner_prod_aligned_scale( x, y, scale, length );
pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 );
- pred_Q13 = silk_SAT16( pred_Q13 );
+ pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 );
+ pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 );
+ /* Faster update for signals with large prediction parameters */
+ smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) );
+
/* Smoothed mid and residual norms */
silk_assert( smooth_coef_Q16 < 32768 );
scale = silk_RSHIFT( scale, 1 );
mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ],
smooth_coef_Q16 );
- nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 );
+ /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */
+ nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 );
+ nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 );
mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ],
smooth_coef_Q16 );
--- a/silk/structs.h
+++ b/silk/structs.h
@@ -101,6 +101,7 @@
opus_int32 mid_side_amp_Q0[ 4 ];
opus_int16 smth_width_Q14;
opus_int16 width_prev_Q14;
+ opus_int16 silent_side_len;
opus_int8 predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ];
opus_int8 mid_only_flags[ MAX_FRAMES_PER_PACKET ];
} stereo_enc_state;
@@ -172,7 +173,7 @@
opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */
opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */
opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */
- opus_int first_frame_after_reset; /* Flag for deactivating NLSF interp. and fluc. reduction after resets */
+ opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */
opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */
opus_int warping_Q16; /* Warping parameter for warped noise shaping */
opus_int useCBR; /* Flag to enable constant bitrate */
@@ -263,7 +264,7 @@
opus_int lagPrev; /* Previous Lag */
opus_int8 LastGainIndex; /* Previous gain index */
opus_int fs_kHz; /* Sampling frequency in kHz */
- opus_int32 prev_API_sampleRate; /* Previous API sample frequency (Hz) */
+ opus_int32 fs_API_hz; /* API sample frequency (Hz) */
opus_int nb_subfr; /* Number of 5 ms subframes in a frame */
opus_int frame_length; /* Frame length (samples) */
opus_int subfr_length; /* Subframe length (samples) */
@@ -270,7 +271,7 @@
opus_int ltp_mem_length; /* Length of LTP memory */
opus_int LPC_order; /* LPC order */
opus_int16 prevNLSF_Q15[ MAX_LPC_ORDER ]; /* Used to interpolate LSFs */
- opus_int first_frame_after_reset; /* Flag for deactivating NLSF interp. and fluc. reduction after resets */
+ opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation */
const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */
const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -802,9 +802,7 @@
if (st->prev_mode == MODE_CELT_ONLY)
celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value));
else
- *value = ((silk_decoder_state*)silk_dec)->indices.signalType == TYPE_VOICED
- ? ((silk_decoder_state*)silk_dec)->lagPrev*48/((silk_decoder_state*)silk_dec)->fs_kHz
- : 0;
+ *value = st->DecControl.prevPitchLag;
}
break;
default: