shithub: opus

Download patch

ref: 8299edfc0c34aaf91ef07bf2410ad15423bcaf96
parent: 693421ea238175e67211ce8a0be8d1db450a9698
author: Jean-Marc Valin <[email protected]>
date: Wed Jan 10 12:41:50 EST 2018

Scaling back the pitch filter when most of the energy is above 3.2 kHz

That corresponds to the fundamental for the shortest pitch period allowed

--- a/celt/celt.h
+++ b/celt/celt.h
@@ -63,6 +63,7 @@
    float music_prob_max;
    int   bandwidth;
    float activity_probability;
+   float max_pitch_ratio;
    /* Store as Q6 char to save space. */
    unsigned char leak_boost[LEAK_BANDS];
 } AnalysisInfo;
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -1109,7 +1109,7 @@
 
 
 static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
-      int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
+      int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis)
 {
    int c;
    VARDECL(celt_sig, _pre);
@@ -1165,7 +1165,10 @@
       gain1 = 0;
       pitch_index = COMBFILTER_MINPERIOD;
    }
-
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid)
+      gain1 *= analysis->max_pitch_ratio;
+#endif
    /* Gain threshold for enabling the prefilter/postfilter */
    pf_threshold = QCONST16(.2f,15);
 
@@ -1603,7 +1606,7 @@
             && st->complexity >= 5;
 
       prefilter_tapset = st->tapset_decision;
-      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
+      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis);
       if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
             && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
          pitch_change = 1;
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -446,6 +446,8 @@
     float leakage_from[NB_TBANDS+1];
     float leakage_to[NB_TBANDS+1];
     float layer_out[MAX_NEURONS];
+    float below_max_pitch;
+    float above_max_pitch;
     SAVE_STACK;
 
     alpha = 1.f/IMIN(10, 1+tonal->count);
@@ -722,6 +724,8 @@
     maxE = 0;
     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
     noise_floor *= noise_floor;
+    below_max_pitch=0;
+    above_max_pitch=0;
     for (b=0;b<NB_TBANDS;b++)
     {
        float E=0;
@@ -738,6 +742,12 @@
        }
        E = SCALE_ENER(E);
        maxE = MAX32(maxE, E);
+       if (band_start < 64)
+       {
+          below_max_pitch += E;
+       } else {
+          above_max_pitch += E;
+       }
        tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
        Em = MAX32(E, tonal->meanE[b]);
        /* Consider the band "active" only if all these conditions are met:
@@ -767,6 +777,7 @@
        /* silk_resampler_down2_hp() shifted right by an extra 8 bits. */
        E *= 256.f*(1.f/Q15ONE)*(1.f/Q15ONE);
 #endif
+       above_max_pitch += E;
        tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
        Em = MAX32(E, tonal->meanE[b]);
        if (Em > 3*noise_ratio*noise_floor*160 || E > noise_ratio*noise_floor*160)
@@ -774,6 +785,10 @@
        /* Check if the band is masked (see below). */
        is_masked[b] = E < (tonal->prev_bandwidth == 20  ? .01f : .05f)*bandwidth_mask;
     }
+    if (above_max_pitch > below_max_pitch)
+       info->max_pitch_ratio = below_max_pitch/above_max_pitch;
+    else
+       info->max_pitch_ratio = 1;
     /* In some cases, resampling aliasing can create a small amount of energy in the first band
        being cut. So if the last band is masked, we don't include it.  */
     if (bandwidth == 20 && is_masked[NB_TBANDS])