ref: 6559d36a46cbc72ecb2c055a943ff1d35696533f
parent: f2d6480f90b47ff942b590f7a4194ba381029702
author: Timothy B. Terriberry <[email protected]>
date: Mon Oct 17 10:20:55 EDT 2011
Remove redundant mid-only flag when side VAD flag is set. If there is activity in a regular, side SILK frame, then it must be coded, so we don't need to send a mid-only flag.
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -212,7 +212,7 @@
{
silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
/* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
- if( lostFlag == FLAG_DECODE_NORMAL ||
+ if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
{
silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -385,9 +385,30 @@
psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+ if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+ /* Reset side channel encoder memory for first frame with side coding */
+ if( psEnc->prev_decode_only_middle == 1 ) {
+ silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
+ silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );
+ psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
+ psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
+ psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;
+ }
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+ } else {
+ psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
+ }
if( !prefillFlag ) {
silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
- silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+ if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+ silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+ }
}
} else {
/* Buffering */
@@ -394,22 +415,8 @@
silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
}
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
- /* Reset side channel encoder memory for first frame with side coding */
- if( encControl->nChannelsInternal == 2 && psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 && psEnc->prev_decode_only_middle == 1 ) {
- silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
- silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
- silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
- silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
- silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
- silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.inputBuf, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.inputBuf ) );
- psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
- psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
- psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
- psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
- psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_inv_gain_Q16 = 65536;
- }
-
/* Encode */
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
if( encControl->nChannelsInternal == 1 ) {
@@ -436,8 +443,6 @@
if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding ) ) != 0 ) {
silk_assert( 0 );
}
- } else {
- psEnc->state_Fxx[ n ].sCmn.VAD_flags[ psEnc->state_Fxx[ n ].sCmn.nFramesEncoded ] = 0;
}
psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -40,38 +40,15 @@
opus_int condCoding /* I The type of conditional coding used so far for this frame */
);
-/****************/
-/* Encode frame */
-/****************/
-opus_int silk_encode_frame_FIX(
- silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
- opus_int32 *pnBytesOut, /* O Number of payload bytes */
- ec_enc *psRangeEnc, /* I/O compressor data structure */
- opus_int condCoding /* I The type of conditional coding to use */
+void silk_encode_do_VAD_FIX(
+ silk_encoder_state_FIX *psEnc /* I/O Encoder state FIX */
)
{
- silk_encoder_control_FIX sEncCtrl;
- opus_int ret = 0;
- opus_int16 *x_frame, *res_pitch_frame;
- opus_int16 xfw[ MAX_FRAME_LENGTH ];
- opus_int16 res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
-
-TIC(ENCODE_FRAME)
-
- psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
-
- /**************************************************************/
- /* Setup Input Pointers, and insert frame in input buffer */
- /*************************************************************/
- /* pointers aligned with start of frame to encode */
- x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
- res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
-
/****************************/
/* Voice Activity Detection */
/****************************/
TIC(VAD)
- ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
TOC(VAD)
/**************************************************/
@@ -93,6 +70,34 @@
psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
}
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
+ opus_int32 *pnBytesOut, /* O Number of payload bytes */
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ silk_encoder_control_FIX sEncCtrl;
+ opus_int ret = 0;
+ opus_int16 *x_frame, *res_pitch_frame;
+ opus_int16 xfw[ MAX_FRAME_LENGTH ];
+ opus_int16 res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+
+TIC(ENCODE_FRAME)
+
+ psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+ /**************************************************************/
+ /* Setup Input Pointers, and insert frame in input buffer */
+ /*************************************************************/
+ /* pointers aligned with start of frame to encode */
+ x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
+ res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
/***************************************/
/* Ensure smooth bandwidth transitions */
--- a/silk/fixed/main_FIX.h
+++ b/silk/fixed/main_FIX.h
@@ -44,6 +44,7 @@
#endif
#define silk_encoder_state_Fxx silk_encoder_state_FIX
+#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX
#define silk_encode_frame_Fxx silk_encode_frame_FIX
/*********************/
@@ -53,6 +54,11 @@
/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
void silk_HP_variable_cutoff(
silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FIX(
+ silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
);
/* Encoder main function */
--- a/silk/float/encode_frame_FLP.c
+++ b/silk/float/encode_frame_FLP.c
@@ -40,38 +40,15 @@
opus_int condCoding /* I The type of conditional coding used so far for this frame */
);
-/****************/
-/* Encode frame */
-/****************/
-opus_int silk_encode_frame_FLP(
- silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
- opus_int32 *pnBytesOut, /* O Number of payload bytes */
- ec_enc *psRangeEnc, /* I/O compressor data structure */
- opus_int condCoding /* I The type of conditional coding to use */
+void silk_encode_do_VAD_FLP(
+ silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
)
{
- silk_encoder_control_FLP sEncCtrl;
- opus_int i, ret = 0;
- silk_float *x_frame, *res_pitch_frame;
- silk_float xfw[ MAX_FRAME_LENGTH ];
- silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
-
-TIC(ENCODE_FRAME)
-
- psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
-
- /**************************************************************/
- /* Setup Input Pointers, and insert frame in input buffer */
- /*************************************************************/
- /* pointers aligned with start of frame to encode */
- x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
- res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
-
/****************************/
/* Voice Activity Detection */
/****************************/
TIC(VAD)
- ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
TOC(VAD)
/**************************************************/
@@ -96,6 +73,34 @@
psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
}
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ opus_int32 *pnBytesOut, /* O Number of payload bytes */
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ silk_encoder_control_FLP sEncCtrl;
+ opus_int i, ret = 0;
+ silk_float *x_frame, *res_pitch_frame;
+ silk_float xfw[ MAX_FRAME_LENGTH ];
+ silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+
+TIC(ENCODE_FRAME)
+
+ psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+ /**************************************************************/
+ /* Setup Input Pointers, and insert frame in input buffer */
+ /*************************************************************/
+ /* pointers aligned with start of frame to encode */
+ x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
+ res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
/***************************************/
/* Ensure smooth bandwidth transitions */
--- a/silk/float/main_FLP.h
+++ b/silk/float/main_FLP.h
@@ -42,6 +42,7 @@
#endif
#define silk_encoder_state_Fxx silk_encoder_state_FLP
+#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FLP
#define silk_encode_frame_Fxx silk_encode_frame_FLP
/*********************/
@@ -51,6 +52,11 @@
/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
void silk_HP_variable_cutoff(
silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FLP(
+ silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
);
/* Encoder main function */