ref: 888756691836ca8ce419a870a768f910330fb9d1
parent: 480ba7034810fec56625dcd9ceeeb01d75c8d755
author: Koen Vos <[email protected]>
date: Thu Oct 6 09:38:26 EDT 2011
SILK update Simplifies mono/stereo switching in SILK Fixes a quantization mismatch between encoder and decoder Constrains the pitch lags in the same way in the encoder and decoder
--- a/silk/decode_core.c
+++ b/silk/decode_core.c
@@ -91,10 +91,10 @@
/* Preload LPC coeficients to array on stack. Gives small performance gain */
silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
- Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
signalType = psDec->indices.signalType;
- inv_gain_Q16 = silk_INVERSE32_varQ( Gain_Q10, 26 );
+ Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
+ inv_gain_Q16 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 32 );
inv_gain_Q16 = silk_min( inv_gain_Q16, silk_int16_MAX );
/* Calculate Gain adjustment factor */
--- a/silk/decode_pitch.c
+++ b/silk/decode_pitch.c
@@ -67,7 +67,7 @@
}
min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz );
- max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz );
+ max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ) - 1;
lag = min_lag + lagIndex;
for( k = 0; k < nb_subfr; k++ ) {
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -119,45 +119,7 @@
return ret;
}
-static void stereo_crossmix(const opus_int16 *in, opus_int16 *out, int channel, int len, int to_mono, int id)
-{
- int i;
- opus_int16 delta, g1, g2;
- const opus_int16 *x1, *x2;
- x1 = in+channel;
- x2 = in+(1-channel);
- g1 = to_mono ? 16384: 8192;
- g2 = to_mono ? 0 : 8192;
-
- /* We want to finish at 0.5 */
- delta = (16384+(len>>1))/(len);
- if (to_mono) {
- delta = -delta;
- }
-
- i=0;
- if (to_mono != 2)
- {
- if ( id==0 ) {
- for ( ; i < len>>1; i++ ) {
- out[ i ] = silk_RSHIFT_ROUND( silk_SMLABB( silk_SMULBB( x1[ 2*i ], g1 ), x2[ 2*i ], g2 ), 14 );
- g1 += delta;
- g2 -= delta;
- }
- }
- }
- if (to_mono) {
- for ( ; i < len; i++ ) {
- out[ i ] = silk_RSHIFT( (opus_int32)x1[ 2*i ] + (opus_int32)x2[ 2*i ], 1 );
- }
- } else {
- for ( ; i < len; i++ ) {
- out[ i ] = x1[ 2*i ];
- }
- }
-}
-
/**************************/
/* Encode frame with Silk */
/**************************/
@@ -268,18 +230,13 @@
/* Resample and write to buffer */
if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
- if ( encControl->toMono > 0) {
- stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, encControl->toMono, id );
- } else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1 ) {
- stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 0, id );
- } else {
- for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n ] = samplesIn[ 2 * n ];
- }
+ for( n = 0; n < nSamplesFromInput; n++ ) {
+ buf[ n ] = samplesIn[ 2 * n ];
}
/* Making sure to start both resamplers from the same state when switching from mono to stereo */
- if(psEnc->nPrevChannelsInternal == 1 && id==0)
+ if(psEnc->nPrevChannelsInternal == 1 && id==0) {
silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
+ }
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@@ -287,14 +244,8 @@
nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
- if ( encControl->toMono > 0) {
- stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, encControl->toMono, id );
- } else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1) {
- stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 0, id );
- } else {
- for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n ] = samplesIn[ 2 * n + 1 ];
- }
+ for( n = 0; n < nSamplesFromInput; n++ ) {
+ buf[ n ] = samplesIn[ 2 * n + 1 ];
}
ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@@ -394,9 +345,9 @@
if( encControl->nChannelsInternal == 2 ) {
silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
- MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8,
+ MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
- if (!prefillFlag) {
+ if( !prefillFlag ) {
silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
}
@@ -437,8 +388,9 @@
flags = silk_LSHIFT( flags, 1 );
flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
}
- if (!prefillFlag)
+ if( !prefillFlag ) {
ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+ }
/* Return zero bytes if all channels DTXed */
if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
--- a/silk/float/pitch_analysis_core_FLP.c
+++ b/silk/float/pitch_analysis_core_FLP.c
@@ -467,17 +467,19 @@
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag );
}
*lagIndex = (opus_int16)( lag_new - min_lag );
*contourIndex = (opus_int8)CBimax;
- } else {
+ } else { /* Fs_kHz == 8 */
/* Save Lags and correlation */
silk_assert( CCmax >= 0.0f );
*LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz );
}
- *lagIndex = (opus_int16)( lag - min_lag );
+ *lagIndex = (opus_int16)( lag - min_lag_8kHz );
*contourIndex = (opus_int8)CBimax;
}
silk_assert( *lagIndex >= 0 );
--- a/silk/main.h
+++ b/silk/main.h
@@ -54,6 +54,7 @@
opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
opus_int32 total_rate_bps, /* I Total bitrate */
opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
+ opus_int toMono, /* I Last frame before a stereo->mono transition */
opus_int fs_kHz, /* I Sample rate (kHz) */
opus_int frame_length /* I Number of samples */
);
--- a/silk/pitch_analysis_core.c
+++ b/silk/pitch_analysis_core.c
@@ -558,15 +558,17 @@
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag );
}
*lagIndex = (opus_int16)( lag_new - min_lag);
*contourIndex = (opus_int8)CBimax;
- } else {
+ } else { /* Fs_kHz == 8 */
/* Save Lags and correlation */
CCmax = silk_max( CCmax, 0 );
*LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz );
}
*lagIndex = (opus_int16)( lag - min_lag_8kHz );
*contourIndex = (opus_int8)CBimax;
--- a/silk/stereo_LR_to_MS.c
+++ b/silk/stereo_LR_to_MS.c
@@ -41,6 +41,7 @@
opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
opus_int32 total_rate_bps, /* I Total bitrate */
opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
+ opus_int toMono, /* I Last frame before a stereo->mono transition */
opus_int fs_kHz, /* I Sample rate (kHz) */
opus_int frame_length /* I Number of samples */
)
@@ -96,7 +97,7 @@
/* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */
- if (total_rate_bps < 1 ) {
+ if( total_rate_bps < 1 ) {
total_rate_bps = 1;
}
min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
@@ -122,7 +123,13 @@
/* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
*mid_only_flag = 0;
- if( state->width_prev_Q14 == 0 &&
+ if( toMono ) {
+ /* Last frame before stereo->mono transition; collapse stereo width */
+ width_Q14 = 0;
+ pred_Q13[ 0 ] = 0;
+ pred_Q13[ 1 ] = 0;
+ silk_stereo_quant_pred( pred_Q13, ix );
+ } else if( state->width_prev_Q14 == 0 &&
( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
{
/* Code as panned-mono; previous frame already had zero width */
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -486,21 +486,13 @@
}
#endif
- if (st->silk_mode.toMono==1 && st->stream_channels==2)
+ if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)
{
- /* In case the encoder changes its mind on stereo->mono transition */
- st->silk_mode.toMono = -1;
- } else if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)
- {
/* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
- st->silk_mode.toMono=1;
+ st->silk_mode.toMono = 1;
st->stream_channels = 2;
- } else if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==1)
- {
- st->silk_mode.toMono=2;
- st->stream_channels = 2;
} else {
- st->silk_mode.toMono=0;
+ st->silk_mode.toMono = 0;
}
#ifdef FUZZING