shithub: opus

Download patch

ref: 6559d36a46cbc72ecb2c055a943ff1d35696533f
parent: f2d6480f90b47ff942b590f7a4194ba381029702
author: Timothy B. Terriberry <[email protected]>
date: Mon Oct 17 10:20:55 EDT 2011

Remove redundant mid-only flag when side VAD flag is set.

If there is activity in a regular, side SILK frame, then it must
 be coded, so we don't need to send a mid-only flag.

--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -212,7 +212,7 @@
         {
             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
-            if(   lostFlag == FLAG_DECODE_NORMAL ||
+            if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
             {
                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -385,9 +385,30 @@
                     psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
                     MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
                     psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+                if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+                    /* Reset side channel encoder memory for first frame with side coding */
+                    if( psEnc->prev_decode_only_middle == 1 ) {
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sShape,               0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt,             0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ,            0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
+                        silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf,        0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );
+                        psEnc->state_Fxx[ 1 ].sCmn.prevLag                = 100;
+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev           = 100;
+                        psEnc->state_Fxx[ 1 ].sShape.LastGainIndex        = 10;
+                        psEnc->state_Fxx[ 1 ].sCmn.prevSignalType         = TYPE_NO_VOICE_ACTIVITY;
+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;
+                    }
+                    silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+                } else {
+                    psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
+                }
                 if( !prefillFlag ) {
                     silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
-                    silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+                    if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+                        silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+                    }
                 }
             } else {
                 /* Buffering */
@@ -394,22 +415,8 @@
                 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
                 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
             }
+            silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
 
-            /* Reset side channel encoder memory for first frame with side coding */
-            if( encControl->nChannelsInternal == 2 && psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 && psEnc->prev_decode_only_middle == 1 ) {
-                silk_memset( &psEnc->state_Fxx[ 1 ].sShape,               0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
-                silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt,             0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
-                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ,            0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
-                silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
-                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
-                silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf,        0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );
-                psEnc->state_Fxx[ 1 ].sCmn.prevLag                = 100;
-                psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev           = 100;
-                psEnc->state_Fxx[ 1 ].sShape.LastGainIndex        = 10;
-                psEnc->state_Fxx[ 1 ].sCmn.prevSignalType         = TYPE_NO_VOICE_ACTIVITY;
-                psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;
-            }
-
             /* Encode */
             for( n = 0; n < encControl->nChannelsInternal; n++ ) {
                 if( encControl->nChannelsInternal == 1 ) {
@@ -436,8 +443,6 @@
                     if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding ) ) != 0 ) {
                         silk_assert( 0 );
                     }
-                } else {
-                    psEnc->state_Fxx[ n ].sCmn.VAD_flags[ psEnc->state_Fxx[ n ].sCmn.nFramesEncoded ] = 0;
                 }
                 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
                 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -40,38 +40,15 @@
     opus_int                         condCoding         /* I    The type of conditional coding used so far for this frame */
 );
 
-/****************/
-/* Encode frame */
-/****************/
-opus_int silk_encode_frame_FIX(
-    silk_encoder_state_FIX          *psEnc,             /* I/O  Encoder state FIX                       */
-    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */
-    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */
-    opus_int                         condCoding         /* I    The type of conditional coding to use   */
+void silk_encode_do_VAD_FIX(
+    silk_encoder_state_FIX          *psEnc              /* I/O  Encoder state FIX                       */
 )
 {
-    silk_encoder_control_FIX sEncCtrl;
-    opus_int     ret = 0;
-    opus_int16   *x_frame, *res_pitch_frame;
-    opus_int16   xfw[ MAX_FRAME_LENGTH ];
-    opus_int16   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
-
-TIC(ENCODE_FRAME)
-
-    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
-
-    /**************************************************************/
-    /* Setup Input Pointers, and insert frame in input buffer    */
-    /*************************************************************/
-    /* pointers aligned with start of frame to encode */
-    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
-    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
-
     /****************************/
     /* Voice Activity Detection */
     /****************************/
 TIC(VAD)
-    ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
 TOC(VAD)
 
     /**************************************************/
@@ -93,6 +70,34 @@
         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
     }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FIX(
+    silk_encoder_state_FIX          *psEnc,             /* I/O  Encoder state FIX                       */
+    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */
+    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */
+    opus_int                         condCoding         /* I    The type of conditional coding to use   */
+)
+{
+    silk_encoder_control_FIX sEncCtrl;
+    opus_int     ret = 0;
+    opus_int16   *x_frame, *res_pitch_frame;
+    opus_int16   xfw[ MAX_FRAME_LENGTH ];
+    opus_int16   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+
+TIC(ENCODE_FRAME)
+
+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+    /**************************************************************/
+    /* Setup Input Pointers, and insert frame in input buffer    */
+    /*************************************************************/
+    /* pointers aligned with start of frame to encode */
+    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
+    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
 
     /***************************************/
     /* Ensure smooth bandwidth transitions */
--- a/silk/fixed/main_FIX.h
+++ b/silk/fixed/main_FIX.h
@@ -44,6 +44,7 @@
 #endif
 
 #define silk_encoder_state_Fxx      silk_encoder_state_FIX
+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FIX
 #define silk_encode_frame_Fxx       silk_encode_frame_FIX
 
 /*********************/
@@ -53,6 +54,11 @@
 /* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
 void silk_HP_variable_cutoff(
     silk_encoder_state_Fxx          state_Fxx[]    /* I/O  Encoder states                              */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FIX(
+    silk_encoder_state_FIX          *psEnc              /* I/O  Pointer to Silk FIX encoder state       */
 );
 
 /* Encoder main function */
--- a/silk/float/encode_frame_FLP.c
+++ b/silk/float/encode_frame_FLP.c
@@ -40,38 +40,15 @@
     opus_int                         condCoding         /* I    The type of conditional coding used so far for this frame */
 );
 
-/****************/
-/* Encode frame */
-/****************/
-opus_int silk_encode_frame_FLP(
-    silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
-    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */
-    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */
-    opus_int                         condCoding         /* I    The type of conditional coding to use   */
+void silk_encode_do_VAD_FLP(
+    silk_encoder_state_FLP          *psEnc              /* I/O  Encoder state FLP                       */
 )
 {
-    silk_encoder_control_FLP sEncCtrl;
-    opus_int     i, ret = 0;
-    silk_float   *x_frame, *res_pitch_frame;
-    silk_float   xfw[ MAX_FRAME_LENGTH ];
-    silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
-
-TIC(ENCODE_FRAME)
-
-    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
-
-    /**************************************************************/
-    /* Setup Input Pointers, and insert frame in input buffer    */
-    /*************************************************************/
-    /* pointers aligned with start of frame to encode */
-    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
-    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
-
     /****************************/
     /* Voice Activity Detection */
     /****************************/
 TIC(VAD)
-    ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
 TOC(VAD)
 
     /**************************************************/
@@ -96,6 +73,34 @@
         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
     }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FLP(
+    silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
+    opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */
+    ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */
+    opus_int                         condCoding         /* I    The type of conditional coding to use   */
+)
+{
+    silk_encoder_control_FLP sEncCtrl;
+    opus_int     i, ret = 0;
+    silk_float   *x_frame, *res_pitch_frame;
+    silk_float   xfw[ MAX_FRAME_LENGTH ];
+    silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+
+TIC(ENCODE_FRAME)
+
+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+    /**************************************************************/
+    /* Setup Input Pointers, and insert frame in input buffer    */
+    /*************************************************************/
+    /* pointers aligned with start of frame to encode */
+    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
+    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
 
     /***************************************/
     /* Ensure smooth bandwidth transitions */
--- a/silk/float/main_FLP.h
+++ b/silk/float/main_FLP.h
@@ -42,6 +42,7 @@
 #endif
 
 #define silk_encoder_state_Fxx      silk_encoder_state_FLP
+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FLP
 #define silk_encode_frame_Fxx       silk_encode_frame_FLP
 
 /*********************/
@@ -51,6 +52,11 @@
 /* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
 void silk_HP_variable_cutoff(
     silk_encoder_state_Fxx          state_Fxx[]        /* I/O  Encoder states                          */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FLP(
+    silk_encoder_state_FLP          *psEnc              /* I/O  Encoder state FLP                       */
 );
 
 /* Encoder main function */