shithub: opus

Download patch

ref: 3880c5abdd8765c510ee3262d3888df848deff0c
parent: 89c5e06d4bd5a5e41dda53478f9f706d5196fdef
author: Jesús de Vicente Peña <[email protected]>
date: Thu Feb 6 10:55:17 EST 2020

Reland "Fixes to the the activity flag that is passed to Silk so it represents the final activity flag used in the DTX decision"

This flag was modified after calling the Silk encoder function. This commit corrects that behavior by introducing those modifications before calling the Silk encoder.

Slightly modified comments by Felicia Lim

Signed-off-by: Felicia Lim <[email protected]>

--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -892,34 +892,15 @@
 #endif
 
 /* Decides if DTX should be turned on (=1) or off (=0) */
-static int decide_dtx_mode(float activity_probability,    /* probability that current frame contains speech/music */
-                           int *nb_no_activity_frames,    /* number of consecutive frames with no activity */
-                           opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */
-                           const opus_val16 *pcm,         /* input pcm signal */
-                           int frame_size,                /* frame size */
-                           int channels,
-                           int is_silence,                 /* only digital silence detected in this frame */
-                           int arch
-                          )
-{
-   opus_val32 noise_energy;
+static int decide_dtx_mode(opus_int activity,            /* indicates if this frame contains speech/music */
+                           int *nb_no_activity_frames    /* number of consecutive frames with no activity */
+                           )
 
-   if (!is_silence)
+{
+   if (!activity)
    {
-      if (activity_probability < DTX_ACTIVITY_THRESHOLD)  /* is noise */
-      {
-         noise_energy = compute_frame_energy(pcm, frame_size, channels, arch);
-
-         /* but is sufficiently quiet */
-         is_silence = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy);
-      }
-   }
-
-   if (is_silence)
-   {
       /* The number of consecutive DTX frames should be within the allowed bounds */
       (*nb_no_activity_frames)++;
-
       if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
       {
          if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
@@ -1102,6 +1083,8 @@
     int analysis_read_subframe_bak=-1;
     int is_silence = 0;
 #endif
+    opus_int activity = VAD_NO_DECISION;
+
     VARDECL(opus_val16, tmp_prefill);
 
     ALLOC_STACK;
@@ -1169,6 +1152,18 @@
     if (!is_silence)
       st->voice_ratio = -1;
 
+    if (analysis_info.valid) {
+       activity = !is_silence &&
+              analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD;
+       if (!activity) {
+          opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
+          /* do not mark as active unless is sufficiently quiet */
+          activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
+       }
+    } else {
+       activity = !is_silence;
+    }
+
     st->detected_bandwidth = 0;
     if (analysis_info.valid)
     {
@@ -1668,7 +1663,6 @@
     if (st->mode != MODE_CELT_ONLY)
     {
         opus_int32 total_bitRate, celt_rate;
-        opus_int activity;
 #ifdef FIXED_POINT
        const opus_int16 *pcm_silk;
 #else
@@ -1676,14 +1670,6 @@
        ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
 #endif
 
-        activity = VAD_NO_DECISION;
-#ifndef DISABLE_FLOAT_API
-        if( analysis_info.valid ) {
-            /* Inform SILK about the Opus VAD decision */
-            activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD );
-        }
-#endif
-
         /* Distribute bits between SILK and CELT */
         total_bitRate = 8 * bytes_target * frame_rate;
         if( st->mode == MODE_HYBRID ) {
@@ -2144,8 +2130,7 @@
 #ifndef DISABLE_FLOAT_API
     if (st->use_dtx && (analysis_info.valid || is_silence))
     {
-       if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames,
-             st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch))
+       if (decide_dtx_mode(activity, &st->nb_no_activity_frames))
        {
           st->rangeFinal = 0;
           data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);