ref: 36481346729962928b58672a86bef140b703787a
parent: 425e8a9bdab4065e8176198738e9caf046a03240
author: Felicia Lim <[email protected]>
date: Mon May 16 11:29:53 EDT 2016
New DTX that works in all modes (SILK/CELT/HYBRID) A frame is marked as valid for DTX if it contains noise or only digital silence. As before, there is an overhang period of 200 ms and a maximum consecutive DTX period of 400 ms. If the new DTX cannot be used because of the complexity setting and sampling frequency chosen, the SILK DTX will be used instead. Signed-off-by: Jean-Marc Valin <[email protected]>
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -57,7 +57,8 @@
float noisiness;
float activity;
float music_prob;
- int bandwidth;
+ int bandwidth;
+ float activity_probability;
} AnalysisInfo;
typedef struct {
--- a/silk/define.h
+++ b/silk/define.h
@@ -56,6 +56,7 @@
/* DTX settings */
#define NB_SPEECH_FRAMES_BEFORE_DTX 10 /* eq 200 ms */
#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */
+#define DTX_ACTIVITY_THRESHOLD 0.1f
/* Maximum sampling frequency */
#define MAX_FS_KHZ 16
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -524,6 +524,9 @@
/* Consider that silence has a 50-50 probability. */
frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
+ /* Probability of speech or music vs noise */
+ info->activity_probability = frame_probs[1];
+
/*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/
{
/* Probability of state transition */
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -84,6 +84,7 @@
int arch;
#ifndef DISABLE_FLOAT_API
TonalityAnalysisState analysis;
+ int use_dtx; /* general DTX for both SILK and CELT */
#endif
#define OPUS_ENCODER_RESET_START stream_channels
@@ -105,6 +106,7 @@
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
#ifndef DISABLE_FLOAT_API
int detected_bandwidth;
+ int nb_no_activity_frames;
#endif
opus_uint32 rangeFinal;
};
@@ -1026,6 +1028,38 @@
return silence;
}
+/* Decides if DTX should be turned on (=1) or off (=0) */
+static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */
+ int *nb_no_activity_frames, /* number of consecutive frames with no activity */
+ int is_silence /* only digital silence detected in this frame */
+ )
+{
+ int is_noise = 0;
+
+ if (!is_silence)
+ {
+ is_noise = activity_probability < DTX_ACTIVITY_THRESHOLD;
+ }
+
+ if (is_silence || is_noise)
+ {
+ /* The number of consecutive DTX frames should be within the allowed bounds */
+ (*nb_no_activity_frames)++;
+
+ if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
+ {
+ if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
+ /* Valid frame for DTX! */
+ return 1;
+ else
+ (*nb_no_activity_frames) = NB_SPEECH_FRAMES_BEFORE_DTX;
+ }
+ } else
+ (*nb_no_activity_frames) = 0;
+
+ return 0;
+}
+
#endif
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
@@ -1289,7 +1323,10 @@
/* When FEC is enabled and there's enough packet loss, use SILK */
if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4)
st->mode = MODE_SILK_ONLY;
- /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */
+ /* When encoding voice and DTX is enabled but the generalized DTX cannot be used,
+ because of complexity and sampling frequency settings,
+ set the encoder to SILK mode so that the SILK DTX can be used */
+ st->silk_mode.useDTX = st->use_dtx && !(analysis_info.valid || is_silence);
if (st->silk_mode.useDTX && voice_est > 100)
st->mode = MODE_SILK_ONLY;
#endif
@@ -2054,6 +2091,20 @@
st->first = 0;
+ /* DTX decision */
+#ifndef DISABLE_FLOAT_API
+ if (st->use_dtx && (analysis_info.valid || is_silence))
+ {
+ if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, is_silence))
+ {
+ st->rangeFinal = 0;
+ data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+ RESTORE_STACK;
+ return 1;
+ }
+ }
+#endif
+
/* In the unlikely case that the SILK encoder busted its target, tell
the decoder to call the PLC */
if (ec_tell(&enc) > (max_data_bytes-1)*8)
@@ -2330,7 +2381,7 @@
{
goto bad_arg;
}
- st->silk_mode.useDTX = value;
+ st->use_dtx = value;
}
break;
case OPUS_GET_DTX_REQUEST:
@@ -2340,7 +2391,7 @@
{
goto bad_arg;
}
- *value = st->silk_mode.useDTX;
+ *value = st->use_dtx;
}
break;
case OPUS_SET_COMPLEXITY_REQUEST: