shithub: opus

Download patch

ref: bfabfd383dee7784aff30c4ed0343b54dc7cbabd
parent: 4155a403473ca3f33d088496fdd302067e5fa9d1
author: Jean-Marc Valin <[email protected]>
date: Mon Jul 23 11:24:40 EDT 2012

Fixes issues with stereo saving and dynalloc

1) Stereo saving was being too aggressive because it only considered the LF
   and because the savings were multiplied by coded_bins rather than by
   just the number of "side bins" below the intensity threshold
2) In the case of bandlimited signals, dynalloc would allocate way too many
   bits to the last non-zero band. We now explicitly check for the last
   band with a meaningful signal.

These issues were really obvious when encoding the decoded test01.mp3
because of the strong inter-channel correlation and (especially) the 16 kHz
lowpass used by the mp3 encoder.

--- a/celt/celt.c
+++ b/celt/celt.c
@@ -822,7 +822,8 @@
 
 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       const opus_val16 *bandLogE, int end, int LM, int C, int N0,
-      AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate)
+      AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
+      int intensity)
 {
    int i;
    opus_val32 diff=0;
@@ -829,10 +830,11 @@
    int c;
    int trim_index = 5;
    opus_val16 trim = QCONST16(5.f, 8);
-   opus_val16 logXC;
+   opus_val16 logXC, logXC2;
    if (C==2)
    {
       opus_val16 sum = 0; /* Q10 */
+      opus_val16 minXC; /* Q10 */
       /* Compute inter-channel correlation for low frequencies */
       for (i=0;i<8;i++)
       {
@@ -843,6 +845,15 @@
          sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
       }
       sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
+      minXC = sum;
+      for (i=8;i<intensity;i++)
+      {
+         int j;
+         opus_val32 partial = 0;
+         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+            partial = MAC16_16(partial, X[j], X[N0+j]);
+         minXC = MIN16(minXC, EXTRACT16(SHR32(partial, 18)));
+      }
       /*printf ("%f\n", sum);*/
       if (sum > QCONST16(.995f,10))
          trim_index-=4;
@@ -852,14 +863,18 @@
          trim_index-=2;
       else if (sum > QCONST16(.8f,10))
          trim_index-=1;
+      /* mid-side savings estimations based on the LF average*/
       logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
+      /* mid-side savings estimations based on min correlation */
+      logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
 #ifdef FIXED_POINT
       /* Compensate for Q20 vs Q14 input and convert output to Q8 */
       logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+      logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
 #endif
 
       trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
-      *stereo_saving = MULT16_16_Q15(-QCONST16(0.25f, 15), logXC);
+      *stereo_saving = -HALF16(logXC2);
    }
 
    /* Estimate spectral tilt */
@@ -1429,6 +1444,7 @@
    /* Make sure that dynamic allocation can't make us bust the budget */
    if (effectiveBytes > 50 && LM>=1)
    {
+      int last=0;
       VARDECL(opus_val16, follower);
       ALLOC(follower, C*st->mode->nbEBands, opus_val16);
       c=0;do
@@ -1435,8 +1451,15 @@
       {
          follower[c*st->mode->nbEBands] = bandLogE2[c*st->mode->nbEBands];
          for (i=1;i<st->end;i++)
+         {
+            /* The last band to be at least 3 dB higher than the previous one
+               is the last we'll consider. Otherwise, we run into problems on
+               bandlimited signals. */
+            if (bandLogE2[c*st->mode->nbEBands+i] > bandLogE2[c*st->mode->nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+               last=i;
             follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i]);
-         for (i=effEnd-2;i>=0;i--)
+         }
+         for (i=last-1;i>=0;i--)
             follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i], MIN16(follower[c*st->mode->nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i]));
          for (i=0;i<st->end;i++)
          {
@@ -1537,14 +1560,6 @@
          dynalloc_logp = IMAX(2, dynalloc_logp-1);
       offsets[i] = boost;
    }
-   alloc_trim = 5;
-   if (tell+(6<<BITRES) <= total_bits - total_boost)
-   {
-      alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
-            st->end, LM, C, N, &st->analysis, &stereo_saving, tf_estimate);
-      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
-      tell = ec_tell_frac(enc);
-   }
 
    if (C==2)
    {
@@ -1576,6 +1591,15 @@
       intensity = IMIN(st->end,IMAX(st->start, intensity));
    }
 
+   alloc_trim = 5;
+   if (tell+(6<<BITRES) <= total_bits - total_boost)
+   {
+      alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
+            st->end, LM, C, N, &st->analysis, &stereo_saving, tf_estimate, intensity);
+      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
+      tell = ec_tell_frac(enc);
+   }
+
    /* Variable bitrate */
    if (vbr_rate>0)
    {
@@ -1599,7 +1623,7 @@
      if (st->constrained_vbr)
         target += (st->vbr_offset>>lm_diff);
 
-     /*printf("%f %f %f\n", st->analysis.activity, st->analysis.tonality, tf_estimate);*/
+     /*printf("%f %f %f %f ", st->analysis.activity, st->analysis.tonality, tf_estimate, stereo_saving);*/
 #ifndef FIXED_POINT
      if (st->analysis.valid && st->analysis.activity<.4)
         target -= (coded_bins<<BITRES)*1*(.4-st->analysis.activity);
@@ -1606,8 +1630,13 @@
 #endif
      if (C==2)
      {
-        target -= MIN32(target/3, SHR16(MULT16_16(stereo_saving,(st->mode->eBands[intensity]<<LM<<BITRES)),8));
-        target += MULT16_16_Q15(QCONST16(0.05f,15),coded_bins<<BITRES);
+        int coded_stereo_bands;
+        int coded_stereo_dof;
+        coded_stereo_bands = IMIN(intensity, coded_bands);
+        coded_stereo_dof = (st->mode->eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
+        /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
+        target -= MIN32(target/3, SHR16(MULT16_16(stereo_saving,(coded_stereo_dof<<BITRES)),8));
+        target += MULT16_16_Q15(QCONST16(0.035,15),coded_stereo_dof<<BITRES);
      }
      /* Compensates for the average tonality boost */
      target -= MULT16_16_Q15(QCONST16(0.13f,15),coded_bins<<BITRES);