shithub: opus

Download patch

ref: 0892c169c6b868c4a49d679f5379e517311d229e
parent: ac2e623d251bc336ca1d401b95ee47e6ffef0c51
author: Jean-Marc Valin <[email protected]>
date: Wed Jan 11 22:44:49 EST 2012

Tonality and pitch tuning

Tuned the tonality estimator to trigger on signals where only part of the
spectrum is tonal. Also tuned the pitch detector not to be confused
by short-term correlation.

--- a/celt/celt.c
+++ b/celt/celt.c
@@ -442,7 +442,7 @@
 #ifdef FUZZING
    is_transient = rand()&0x1;
 #endif
-   /*printf("%d %d %d %f %f\n", is_transient, *tf_estimate, tf_max, 0., 1.);*/
+   /*printf("%d %f %d %f %f ", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/
    return is_transient;
 }
 
@@ -1206,8 +1206,10 @@
          ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
 
          pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+         /* Don't search for the fir last 1.5 octave of the range because
+            there's too many false-positives due to short-term correlation */
          pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
+               COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
          pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
 
          gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
@@ -1619,11 +1621,11 @@
      if (st->analysis.valid) {
         int tonal_target;
         float tonal;
-        tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality);
-        tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
+        tonal = MAX16(0,st->analysis.tonality-.2);
+        tonal_target = new_target + (coded_bins<<BITRES)*2.0f*tonal;
         if (pitch_change)
            tonal_target +=  (coded_bins<<BITRES)*.8;
-        /*printf("%f %d\n", tonal, tonal_target);*/
+        /*printf("%f %f ", st->analysis.tonality, tonal);*/
         new_target = IMAX(tonal_target,new_target);
      }
 #endif
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -331,6 +331,7 @@
       int T1, T1b;
       opus_val16 g1;
       opus_val16 cont=0;
+      opus_val16 thresh;
       T1 = (2*T0+k)/(2*k);
       if (T1 < minperiod)
          break;
@@ -372,7 +373,14 @@
          cont = HALF32(prev_gain);
       else
          cont = 0;
-      if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont);
+      if (g1 > thresh)
       {
          best_xy = xy;
          best_yy = yy;
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -74,7 +74,7 @@
       .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
 };
 
-#define NB_TONAL_SKIP_BANDS 0
+#define NB_TONAL_SKIP_BANDS 9
 
 typedef struct {
    float angle[240];
@@ -265,8 +265,7 @@
        frame_stationarity += stationarity;
        /*band_tonality[b] = tE/(1e-15+E)*/;
        band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
-       //printf("%f ", band_tonality[b]);
-#if 1
+#if 0
        if (b>=NB_TONAL_SKIP_BANDS)
        {
           frame_tonality += tweight[b]*band_tonality[b];
@@ -277,7 +276,7 @@
        if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
           frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
 #endif
-       max_frame_tonality = MAX16(max_frame_tonality, frame_tonality);
+       max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality);
        slope += band_tonality[b]*(b-8);
        /*printf("%f %f ", band_tonality[b], stationarity);*/
        if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
@@ -295,7 +294,7 @@
        }
        tonal->prev_band_tonality[b] = band_tonality[b];
     }
-    //printf("\n");
+
     frame_loudness = 20*log10(frame_loudness);
     tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
     tonal->lowECount *= (1-alphaE);
@@ -320,7 +319,7 @@
 #else
     info->activity = .5*(1+frame_noisiness-frame_stationarity);
 #endif
-    frame_tonality = (max_frame_tonality/(tw_sum));
+    frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
     frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
     tonal->prev_tonality = frame_tonality;
     info->boost_amount[0] -= frame_tonality+.2;