shithub: opus

--- a/silk/dec_API.c

+++ b/silk/dec_API.c

@@ -212,7 +212,7 @@

             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );

             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */

-            if(   lostFlag == FLAG_DECODE_NORMAL ||

+            if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||

                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )

                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );

--- a/silk/enc_API.c

+++ b/silk/enc_API.c

@@ -385,9 +385,30 @@

                     psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],

                     MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,

                     psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );

+                if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {

+                    /* Reset side channel encoder memory for first frame with side coding */

+                    if( psEnc->prev_decode_only_middle == 1 ) {

+                        silk_memset( &psEnc->state_Fxx[ 1 ].sShape,               0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );

+                        silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt,             0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );

+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ,            0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );

+                        silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );

+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );

+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf,        0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );

+                        psEnc->state_Fxx[ 1 ].sCmn.prevLag                = 100;

+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev           = 100;

+                        psEnc->state_Fxx[ 1 ].sShape.LastGainIndex        = 10;

+                        psEnc->state_Fxx[ 1 ].sCmn.prevSignalType         = TYPE_NO_VOICE_ACTIVITY;

+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;

+                    }

+                    silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );

+                } else {

+                    psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;

+                }

                 if( !prefillFlag ) {

                     silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );

-                    silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );

+                    if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {

+                        silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );

+                    }

             } else {

                 /* Buffering */

@@ -394,22 +415,8 @@

                 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );

                 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );

+            silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );

-            /* Reset side channel encoder memory for first frame with side coding */

-            if( encControl->nChannelsInternal == 2 && psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 && psEnc->prev_decode_only_middle == 1 ) {

-                silk_memset( &psEnc->state_Fxx[ 1 ].sShape,               0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );

-                silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt,             0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );

-                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ,            0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );

-                silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );

-                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );

-                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf,        0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );

-                psEnc->state_Fxx[ 1 ].sCmn.prevLag                = 100;

-                psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev           = 100;

-                psEnc->state_Fxx[ 1 ].sShape.LastGainIndex        = 10;

-                psEnc->state_Fxx[ 1 ].sCmn.prevSignalType         = TYPE_NO_VOICE_ACTIVITY;

-                psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;

-            }

             /* Encode */

             for( n = 0; n < encControl->nChannelsInternal; n++ ) {

                 if( encControl->nChannelsInternal == 1 ) {

@@ -436,8 +443,6 @@

                     if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding ) ) != 0 ) {

                         silk_assert( 0 );

-                } else {

-                    psEnc->state_Fxx[ n ].sCmn.VAD_flags[ psEnc->state_Fxx[ n ].sCmn.nFramesEncoded ] = 0;

                 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;

                 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;

--- a/silk/fixed/encode_frame_FIX.c

+++ b/silk/fixed/encode_frame_FIX.c

@@ -40,38 +40,15 @@

     opus_int                         condCoding         /* I    The type of conditional coding used so far for this frame */

);

-/****************/

-/* Encode frame */

-/****************/

-opus_int silk_encode_frame_FIX(

-    silk_encoder_state_FIX          *psEnc,             /* I/O  Encoder state FIX                       */

-    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */

-    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */

-    opus_int                         condCoding         /* I    The type of conditional coding to use   */

+void silk_encode_do_VAD_FIX(

+    silk_encoder_state_FIX          *psEnc              /* I/O  Encoder state FIX                       */

-    silk_encoder_control_FIX sEncCtrl;

-    opus_int     ret = 0;

-    opus_int16   *x_frame, *res_pitch_frame;

-    opus_int16   xfw[ MAX_FRAME_LENGTH ];

-    opus_int16   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];

-TIC(ENCODE_FRAME)

-    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;

-    /**************************************************************/

-    /* Setup Input Pointers, and insert frame in input buffer    */

-    /*************************************************************/

-    /* pointers aligned with start of frame to encode */

-    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */

-    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */

     /****************************/

     /* Voice Activity Detection */

     /****************************/

 TIC(VAD)

-    ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );

+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );

 TOC(VAD)

     /**************************************************/

@@ -93,6 +70,34 @@

         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;

         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;

+}

+/****************/

+/* Encode frame */

+/****************/

+opus_int silk_encode_frame_FIX(

+    silk_encoder_state_FIX          *psEnc,             /* I/O  Encoder state FIX                       */

+    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */

+    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */

+    opus_int                         condCoding         /* I    The type of conditional coding to use   */

+)

+{

+    silk_encoder_control_FIX sEncCtrl;

+    opus_int     ret = 0;

+    opus_int16   *x_frame, *res_pitch_frame;

+    opus_int16   xfw[ MAX_FRAME_LENGTH ];

+    opus_int16   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];

+TIC(ENCODE_FRAME)

+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;

+    /**************************************************************/

+    /* Setup Input Pointers, and insert frame in input buffer    */

+    /*************************************************************/

+    /* pointers aligned with start of frame to encode */

+    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */

+    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */

     /***************************************/

     /* Ensure smooth bandwidth transitions */

--- a/silk/fixed/main_FIX.h

+++ b/silk/fixed/main_FIX.h

@@ -44,6 +44,7 @@

 #endif

 #define silk_encoder_state_Fxx      silk_encoder_state_FIX

+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FIX

 #define silk_encode_frame_Fxx       silk_encode_frame_FIX

 /*********************/

@@ -53,6 +54,11 @@

 /* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */

 void silk_HP_variable_cutoff(

     silk_encoder_state_Fxx          state_Fxx[]    /* I/O  Encoder states                              */

+);

+/* Encoder main function */

+void silk_encode_do_VAD_FIX(

+    silk_encoder_state_FIX          *psEnc              /* I/O  Pointer to Silk FIX encoder state       */

);

 /* Encoder main function */

--- a/silk/float/encode_frame_FLP.c

+++ b/silk/float/encode_frame_FLP.c

@@ -40,38 +40,15 @@

     opus_int                         condCoding         /* I    The type of conditional coding used so far for this frame */

);

-/****************/

-/* Encode frame */

-/****************/

-opus_int silk_encode_frame_FLP(

-    silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */

-    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */

-    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */

-    opus_int                         condCoding         /* I    The type of conditional coding to use   */

+void silk_encode_do_VAD_FLP(

+    silk_encoder_state_FLP          *psEnc              /* I/O  Encoder state FLP                       */

-    silk_encoder_control_FLP sEncCtrl;

-    opus_int     i, ret = 0;

-    silk_float   *x_frame, *res_pitch_frame;

-    silk_float   xfw[ MAX_FRAME_LENGTH ];

-    silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];

-TIC(ENCODE_FRAME)

-    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;

-    /**************************************************************/

-    /* Setup Input Pointers, and insert frame in input buffer    */

-    /*************************************************************/

-    /* pointers aligned with start of frame to encode */

-    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */

-    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */

     /****************************/

     /* Voice Activity Detection */

     /****************************/

 TIC(VAD)

-    ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );

+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );

 TOC(VAD)

     /**************************************************/

@@ -96,6 +73,34 @@

         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;

         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;

+}

+/****************/

+/* Encode frame */

+/****************/

+opus_int silk_encode_frame_FLP(

+    silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */

+    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */

+    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */

+    opus_int                         condCoding         /* I    The type of conditional coding to use   */

+)

+{

+    silk_encoder_control_FLP sEncCtrl;

+    opus_int     i, ret = 0;

+    silk_float   *x_frame, *res_pitch_frame;

+    silk_float   xfw[ MAX_FRAME_LENGTH ];

+    silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];

+TIC(ENCODE_FRAME)

+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;

+    /**************************************************************/

+    /* Setup Input Pointers, and insert frame in input buffer    */

+    /*************************************************************/

+    /* pointers aligned with start of frame to encode */

+    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */

+    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */

     /***************************************/

     /* Ensure smooth bandwidth transitions */

--- a/silk/float/main_FLP.h

+++ b/silk/float/main_FLP.h

@@ -42,6 +42,7 @@

 #endif

 #define silk_encoder_state_Fxx      silk_encoder_state_FLP

+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FLP

 #define silk_encode_frame_Fxx       silk_encode_frame_FLP

 /*********************/

@@ -51,6 +52,11 @@

 /* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */

 void silk_HP_variable_cutoff(

     silk_encoder_state_Fxx          state_Fxx[]        /* I/O  Encoder states                          */

+);

+/* Encoder main function */

+void silk_encode_do_VAD_FLP(

+    silk_encoder_state_FLP          *psEnc              /* I/O  Encoder state FLP                       */

);

 /* Encoder main function */