shithub: opus

--- a/silk/control.h

+++ b/silk/control.h

@@ -83,6 +83,9 @@

     /* I:   Flag to use constant bitrate                                                    */

     opus_int useCBR;

+    /* I:   Causes a smooth downmix to mono */

+    opus_int toMono;

     /* O:   Internal sampling rate used, in Hertz; 8000/12000/16000                         */

     opus_int32 internalSampleRate;

--- a/silk/enc_API.c

+++ b/silk/enc_API.c

@@ -119,6 +119,43 @@

     return ret;

+static void stereo_crossmix(const opus_int16 *in, opus_int16 *out, int channel, int len, int to_mono, int id)

+{

+   int i;

+   opus_int16                            delta, g1, g2;

+   const opus_int16                     *x1, *x2;

+   x1 = in+channel;

+   x2 = in+(1-channel);

+   g1 = to_mono ? 16384: 8192;

+   g2 = to_mono ? 0 : 8192;

+   /* We want to finish at 0.5 */

+   delta = (16384+(len>>1))/(len);

+   if (to_mono) {

+      delta = -delta;

+   }

+   i=0;

+   if ( id==0 ) {

+      for ( ; i < len>>1; i++ ) {

+         out[ i ] = silk_RSHIFT_ROUND( silk_SMLABB( silk_SMULBB( x1[ 2*i ], g1 ), x2[ 2*i ], g2 ), 14 );

+         g1 += delta;

+         g2 -= delta;

+      }

+   }

+   if (to_mono) {

+      for ( ; i < len; i++ ) {

+         out[ i ] = silk_RSHIFT( (opus_int32)x1[ 2*i ] + (opus_int32)x2[ 2*i ], 1 );

+      }

+   } else {

+      for ( ; i < len; i++ ) {

+         out[ i ] = x1[ 2*i ];

+      }

+   }

+   /*fprintf(stderr, "%d %d %d\n", g1, g2, to_mono);*/

+}

 /**************************/

 /* Encode frame with Silk */

 /**************************/

@@ -218,11 +255,18 @@

         nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );

         /* Resample and write to buffer */

         if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {

-            for( n = 0; n < nSamplesFromInput; n++ ) {

-                buf[ n ] = samplesIn[ 2 * n ];

+            int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;

+            if ( encControl->toMono ) {

+                stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 1, id );

+            } else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1 ) {

+                stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 0, id );

+            } else {

+                for( n = 0; n < nSamplesFromInput; n++ ) {

+                    buf[ n ] = samplesIn[ 2 * n ];

+                }

             /* Making sure to start both resamplers from the same state when switching from mono to stereo */

-            if(psEnc->nPrevChannelsInternal == 1)

+            if(psEnc->nPrevChannelsInternal == 1 && id==0)

                silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));

             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,

@@ -231,8 +275,14 @@

             nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;

             nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );

-            for( n = 0; n < nSamplesFromInput; n++ ) {

-                buf[ n ] = samplesIn[ 2 * n + 1 ];

+            if ( encControl->toMono ) {

+                stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 1, id );

+            } else if( psEnc->nPrevChannelsInternal == 1 ) {

+                stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 0, id );

+            } else {

+                for( n = 0; n < nSamplesFromInput; n++ ) {

+                    buf[ n ] = samplesIn[ 2 * n + 1 ];

+                }

             ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

@@ -251,7 +301,6 @@

                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], samplesIn, nSamplesFromInput );

             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;

-        psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;

         samplesIn  += nSamplesFromInput * encControl->nChannelsAPI;

         nSamplesIn -= nSamplesFromInput;

@@ -407,6 +456,7 @@

             break;

+    psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;

     encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;

     encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -76,6 +76,7 @@

     opus_val32   hp_mem[4];

     int          mode;

     int          prev_mode;

+    int          prev_channels;

     int          bandwidth;

     /* Sampling rate (at the API level) */

     int          first;

@@ -328,6 +329,35 @@

 #endif

+static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,

+        int overlap, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)

+{

+    int i;

+    int inc = 48000/Fs;

+    g1 = Q15ONE-g1;

+    g2 = Q15ONE-g2;

+    for (i=0;i<overlap;i++)

+    {

+       opus_val32 diff;

+       opus_val16 g, w;

+       w = MULT16_16_Q15(window[i*inc], window[i*inc]);

+       g = SHR32(MAC16_16(MULT16_16(w,g2),

+             Q15ONE-w, g1), 15);

+       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));

+       diff = MULT16_16_Q15(g, diff);

+       out[i*channels] = out[i*channels] - diff;

+       out[i*channels+1] = out[i*channels+1] + diff;

+    }

+    for (;i<frame_size;i++)

+    {

+       opus_val32 diff;

+       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));

+       diff = MULT16_16_Q15(g2, diff);

+       out[i*channels] = out[i*channels] - diff;

+       out[i*channels+1] = out[i*channels+1] + diff;

+    }

+}

 OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int mode, int *error)

    int ret;

@@ -442,6 +472,19 @@

 #endif

+    if (st->silk_mode.toMono==1 && st->stream_channels==2)

+    {

+       /* In case the encoder changes its mind on stereo->mono transition */

+       st->silk_mode.toMono = -1;

+    } else if (st->stream_channels == 1 && st->prev_channels ==2 && !st->silk_mode.toMono)

+    {

+       /* Delay stereo->mono transition so that SILK can do a smooth downmix */

+       st->silk_mode.toMono=1;

+       st->stream_channels = 2;

+    } else {

+       st->silk_mode.toMono=0;

+    }

 #ifdef FUZZING

     /* Random mode switching */

     if ((rand()&0xF)==0)

@@ -830,26 +873,26 @@

         st->delay_buffer[i] = pcm_buf[(frame_size+delay_compensation-st->encoder_buffer)*st->channels+i];

-    if( st->mode == MODE_HYBRID && st->stream_channels == 2 ) {

+    if (st->mode != MODE_HYBRID || st->stream_channels==1)

+       st->silk_mode.stereoWidth_Q14 = 1<<14;

+    if( st->channels == 2 ) {

         /* Apply stereo width reduction (at low bitrates) */

         if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {

-            int width_Q14, delta_Q14, nSamples_8ms, diff;

-            nSamples_8ms = ( st->Fs * 8 ) / 1000;

-            width_Q14 = (1 << 14 ) - st->hybrid_stereo_width_Q14;

-            delta_Q14 = ( st->hybrid_stereo_width_Q14 - st->silk_mode.stereoWidth_Q14 ) / nSamples_8ms;

-            for( i = 0; i < nSamples_8ms; i++ ) {

-                width_Q14 += delta_Q14;

-                diff = pcm_buf[ 2*i+1 ] - (opus_int32)pcm_buf[ 2*i ];

-                diff = ( diff * width_Q14 ) >> 15;

-                pcm_buf[ 2*i ]   = (opus_int16)( pcm_buf[ 2*i ]   + diff );

-                pcm_buf[ 2*i+1 ] = (opus_int16)( pcm_buf[ 2*i+1 ] - diff );

-            }

-            for( ; i < frame_size; i++ ) {

-                diff = pcm_buf[ 2*i+1 ] - (opus_int32)pcm_buf[ 2*i ];

-                diff = ( diff * width_Q14 ) >> 15;

-                pcm_buf[ 2*i ]   = (opus_int16)( pcm_buf[ 2*i ]   + diff );

-                pcm_buf[ 2*i+1 ] = (opus_int16)( pcm_buf[ 2*i+1 ] - diff );

-            }

+            opus_val16 g1, g2;

+            const CELTMode *celt_mode;

+            celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));

+            g1 = st->hybrid_stereo_width_Q14;

+            g2 = st->silk_mode.stereoWidth_Q14;

+#ifdef FIXED_POINT

+            g1 *= (1./16384);

+            g2 *= (1./16384);

+#else

+            g1 = g1==16384 ? Q15ONE : SHL16(g1,1);

+            g2 = g2==16384 ? Q15ONE : SHL16(g2,1);

+#endif

+            stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,

+                  frame_size, st->channels, celt_mode->window, st->Fs);

             st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;

@@ -944,6 +987,8 @@

         st->prev_mode = MODE_CELT_ONLY;

     else

         st->prev_mode = st->mode;

+    st->prev_channels = st->stream_channels;

     st->first = 0;

     RESTORE_STACK;

     return ret+1+redundancy_bytes;