ref: de3e16c858ac240303f1626b93d53a52b080c1a2
parent: 55788c8c857f28f3b5e6d14cab978398d79fcf24
author: Jean-Marc Valin <[email protected]>
date: Sun Oct 2 20:39:29 EDT 2011
Fixes stereo->mono switching bugs (encoder) Delaying stereo->mono switching decisions so that SILK can do a smooth downmix. Also, wrote proper float/fixed code for the hybrid variable stereo collapse, including a smooth downmix for stereo<->mono switching
--- a/silk/control.h
+++ b/silk/control.h
@@ -83,6 +83,9 @@
/* I: Flag to use constant bitrate */
opus_int useCBR;
+ /* I: Causes a smooth downmix to mono */
+ opus_int toMono;
+
/* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */
opus_int32 internalSampleRate;
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -119,6 +119,43 @@
return ret;
}
+static void stereo_crossmix(const opus_int16 *in, opus_int16 *out, int channel, int len, int to_mono, int id)
+{
+ int i;
+ opus_int16 delta, g1, g2;
+ const opus_int16 *x1, *x2;
+
+ x1 = in+channel;
+ x2 = in+(1-channel);
+ g1 = to_mono ? 16384: 8192;
+ g2 = to_mono ? 0 : 8192;
+
+ /* We want to finish at 0.5 */
+ delta = (16384+(len>>1))/(len);
+ if (to_mono) {
+ delta = -delta;
+ }
+
+ i=0;
+ if ( id==0 ) {
+ for ( ; i < len>>1; i++ ) {
+ out[ i ] = silk_RSHIFT_ROUND( silk_SMLABB( silk_SMULBB( x1[ 2*i ], g1 ), x2[ 2*i ], g2 ), 14 );
+ g1 += delta;
+ g2 -= delta;
+ }
+ }
+ if (to_mono) {
+ for ( ; i < len; i++ ) {
+ out[ i ] = silk_RSHIFT( (opus_int32)x1[ 2*i ] + (opus_int32)x2[ 2*i ], 1 );
+ }
+ } else {
+ for ( ; i < len; i++ ) {
+ out[ i ] = x1[ 2*i ];
+ }
+ }
+ /*fprintf(stderr, "%d %d %d\n", g1, g2, to_mono);*/
+}
+
/**************************/
/* Encode frame with Silk */
/**************************/
@@ -218,11 +255,18 @@
nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
/* Resample and write to buffer */
if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
- for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n ] = samplesIn[ 2 * n ];
+ int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
+ if ( encControl->toMono ) {
+ stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 1, id );
+ } else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1 ) {
+ stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 0, id );
+ } else {
+ for( n = 0; n < nSamplesFromInput; n++ ) {
+ buf[ n ] = samplesIn[ 2 * n ];
+ }
}
/* Making sure to start both resamplers from the same state when switching from mono to stereo */
- if(psEnc->nPrevChannelsInternal == 1)
+ if(psEnc->nPrevChannelsInternal == 1 && id==0)
silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
@@ -231,8 +275,14 @@
nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
- for( n = 0; n < nSamplesFromInput; n++ ) {
- buf[ n ] = samplesIn[ 2 * n + 1 ];
+ if ( encControl->toMono ) {
+ stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 1, id );
+ } else if( psEnc->nPrevChannelsInternal == 1 ) {
+ stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 0, id );
+ } else {
+ for( n = 0; n < nSamplesFromInput; n++ ) {
+ buf[ n ] = samplesIn[ 2 * n + 1 ];
+ }
}
ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
@@ -251,7 +301,6 @@
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], samplesIn, nSamplesFromInput );
psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
}
- psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
samplesIn += nSamplesFromInput * encControl->nChannelsAPI;
nSamplesIn -= nSamplesFromInput;
@@ -407,6 +456,7 @@
break;
}
}
+ psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -76,6 +76,7 @@
opus_val32 hp_mem[4];
int mode;
int prev_mode;
+ int prev_channels;
int bandwidth;
/* Sampling rate (at the API level) */
int first;
@@ -328,6 +329,35 @@
#endif
}
+static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
+ int overlap, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
+{
+ int i;
+ int inc = 48000/Fs;
+ g1 = Q15ONE-g1;
+ g2 = Q15ONE-g2;
+ for (i=0;i<overlap;i++)
+ {
+ opus_val32 diff;
+ opus_val16 g, w;
+ w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+ g = SHR32(MAC16_16(MULT16_16(w,g2),
+ Q15ONE-w, g1), 15);
+ diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+ diff = MULT16_16_Q15(g, diff);
+ out[i*channels] = out[i*channels] - diff;
+ out[i*channels+1] = out[i*channels+1] + diff;
+ }
+ for (;i<frame_size;i++)
+ {
+ opus_val32 diff;
+ diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+ diff = MULT16_16_Q15(g2, diff);
+ out[i*channels] = out[i*channels] - diff;
+ out[i*channels+1] = out[i*channels+1] + diff;
+ }
+}
+
OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int mode, int *error)
{
int ret;
@@ -442,6 +472,19 @@
}
#endif
+ if (st->silk_mode.toMono==1 && st->stream_channels==2)
+ {
+ /* In case the encoder changes its mind on stereo->mono transition */
+ st->silk_mode.toMono = -1;
+ } else if (st->stream_channels == 1 && st->prev_channels ==2 && !st->silk_mode.toMono)
+ {
+ /* Delay stereo->mono transition so that SILK can do a smooth downmix */
+ st->silk_mode.toMono=1;
+ st->stream_channels = 2;
+ } else {
+ st->silk_mode.toMono=0;
+ }
+
#ifdef FUZZING
/* Random mode switching */
if ((rand()&0xF)==0)
@@ -830,26 +873,26 @@
st->delay_buffer[i] = pcm_buf[(frame_size+delay_compensation-st->encoder_buffer)*st->channels+i];
- if( st->mode == MODE_HYBRID && st->stream_channels == 2 ) {
+ if (st->mode != MODE_HYBRID || st->stream_channels==1)
+ st->silk_mode.stereoWidth_Q14 = 1<<14;
+ if( st->channels == 2 ) {
/* Apply stereo width reduction (at low bitrates) */
if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
- int width_Q14, delta_Q14, nSamples_8ms, diff;
- nSamples_8ms = ( st->Fs * 8 ) / 1000;
- width_Q14 = (1 << 14 ) - st->hybrid_stereo_width_Q14;
- delta_Q14 = ( st->hybrid_stereo_width_Q14 - st->silk_mode.stereoWidth_Q14 ) / nSamples_8ms;
- for( i = 0; i < nSamples_8ms; i++ ) {
- width_Q14 += delta_Q14;
- diff = pcm_buf[ 2*i+1 ] - (opus_int32)pcm_buf[ 2*i ];
- diff = ( diff * width_Q14 ) >> 15;
- pcm_buf[ 2*i ] = (opus_int16)( pcm_buf[ 2*i ] + diff );
- pcm_buf[ 2*i+1 ] = (opus_int16)( pcm_buf[ 2*i+1 ] - diff );
- }
- for( ; i < frame_size; i++ ) {
- diff = pcm_buf[ 2*i+1 ] - (opus_int32)pcm_buf[ 2*i ];
- diff = ( diff * width_Q14 ) >> 15;
- pcm_buf[ 2*i ] = (opus_int16)( pcm_buf[ 2*i ] + diff );
- pcm_buf[ 2*i+1 ] = (opus_int16)( pcm_buf[ 2*i+1 ] - diff );
- }
+ opus_val16 g1, g2;
+ const CELTMode *celt_mode;
+
+ celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+ g1 = st->hybrid_stereo_width_Q14;
+ g2 = st->silk_mode.stereoWidth_Q14;
+#ifdef FIXED_POINT
+ g1 *= (1./16384);
+ g2 *= (1./16384);
+#else
+ g1 = g1==16384 ? Q15ONE : SHL16(g1,1);
+ g2 = g2==16384 ? Q15ONE : SHL16(g2,1);
+#endif
+ stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
+ frame_size, st->channels, celt_mode->window, st->Fs);
st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
}
}
@@ -944,6 +987,8 @@
st->prev_mode = MODE_CELT_ONLY;
else
st->prev_mode = st->mode;
+ st->prev_channels = st->stream_channels;
+
st->first = 0;
RESTORE_STACK;
return ret+1+redundancy_bytes;