shithub: opus

Download patch

ref: a396e153b94cc9bcfd2c008de6448d52fd376407
parent: 4b000c37e75b85e265fa8645389c6057d2c88ada
author: Timothy B. Terriberry <[email protected]>
date: Tue Jan 25 18:05:04 EST 2011

More anti-collapse fixes, as well as a fold fix.

This changes folding so that the LCG is never used on transients
 (either short blocks or long blocks with increased time
 resolution), except in the case that there's not enough decoded
 spectrum to fold yet.

It also now only subtracts the anti-collapse bit from the total
 allocation in quant_all_bands() when space has actually been
 reserved for it.

Finally, it cleans up some of the fill and collapse_mask tracking
 (this tracking was originally made intentionally sloppy to save
 work, but then converted to replace the existing fill flag at the
 last minute, which can have a number of logical implications).
The changes, in particular:
1) Splits of less than a block now correctly mark the second half
    as filled only if the whole block was filled (previously it
    would also mark it filled if the next block was filled).
2) Splits of less than a block now correctly mark a block as
    un-collapsed if either half was un-collapsed, instead of marking
    the next block as un-collapsed when the high half was.
3) The N=2 stereo special case now keeps its fill mask even when
    itheta==16384; previously this would have gotten cleared,
    despite the fact that we fold into the side in this case.
4) The test against fill for folding now only considers the bits
    corresponding to the current set of blocks.
   Previously it would still fold if any later block was filled.
5) The collapse mask used for the LCG fold data is now correctly
    initialized when B=16 on platforms with a 16-bit int.
6) The high bits on a collapse mask are now cleared after the TF
    resolution changes and interleaving at level 0, instead of
    waiting until the very end.
   This prevents extraneous high flags set on mid from being mixed
    into the side flags for mid-side stereo.

--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -737,7 +737,7 @@
          split = 1;
          LM -= 1;
          if (B==1)
-            fill |= fill<<1;
+            fill = fill&1|fill<<1;
          B = (B+1)>>1;
       }
    }
@@ -749,6 +749,7 @@
       int mbits, sbits, delta;
       int qalloc;
       int offset;
+      int orig_fill;
       celt_int32 tell;
 
       /* Decide on the resolution to give to the split parameter theta */
@@ -869,6 +870,7 @@
                - tell;
       b -= qalloc;
 
+      orig_fill = fill;
       if (itheta == 0)
       {
          imid = 32767;
@@ -928,7 +930,9 @@
             }
          }
          sign = 1-2*sign;
-         cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, fill);
+         /* We use orig_fill here because we want to fold the side, but if
+             itheta==16384, we'll have cleared the low bits of fill. */
+         cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, orig_fill);
          /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
              and there's no need to worry about mixing with the other channel. */
          y2[0] = -sign*x2[1];
@@ -986,7 +990,7 @@
              folding will be done to the side. */
          cm |= quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,
                next_lowband2, resynth, ec, remaining_bits, LM, NULL,
-               NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<B;
+               NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<(B0>>1&stereo-1);
       }
 
    } else {
@@ -1018,12 +1022,17 @@
          int j;
          if (resynth)
          {
+            unsigned cm_mask;
+            /*B can be as large as 16, so this shift might overflow an int on a
+               16-bit platform; use a long to get defined behavior.*/
+            cm_mask = (unsigned)(1UL<<B)-1;
+            fill &= cm_mask;
             if (!fill)
             {
                for (j=0;j<N;j++)
                   X[j] = 0;
             } else {
-               if (lowband == NULL || (spread==SPREAD_AGGRESSIVE && B<=1))
+               if (lowband == NULL)
                {
                   /* Noise */
                   for (j=0;j<N;j++)
@@ -1031,7 +1040,7 @@
                      *seed = lcg_rand(*seed);
                      X[j] = (celt_int32)(*seed)>>20;
                   }
-                  cm = (1<<B)-1;
+                  cm = cm_mask;
                } else {
                   /* Folded spectrum */
                   for (j=0;j<N;j++)
@@ -1050,10 +1059,7 @@
       if (stereo)
       {
          if (N!=2)
-         {
-            cm |= cm>>B;
             stereo_merge(X, Y, mid, N);
-         }
          if (inv)
          {
             int j;
@@ -1096,6 +1102,7 @@
             for (j=0;j<N0;j++)
                lowband_out[j] = MULT16_16_Q15(n,X[j]);
          }
+         cm &= (1<<B)-1;
       }
    }
    return cm;
@@ -1104,7 +1111,7 @@
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
       celt_norm *_X, celt_norm *_Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
       int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, int resynth,
-      int total_bits, void *ec, int LM, int codedBands, ec_uint32 *seed)
+      celt_int32 total_bits, void *ec, int LM, int codedBands, ec_uint32 *seed)
 {
    int i;
    celt_int32 balance;
@@ -1155,7 +1162,7 @@
       /* Compute how many bits we want to allocate to this band */
       if (i != start)
          balance -= tell;
-      remaining_bits = ((celt_int32)total_bits<<BITRES)-tell-1- (shortBlocks&&LM>=2 ? (1<<BITRES) : 0);
+      remaining_bits = total_bits-tell-1;
       if (i <= codedBands-1)
       {
          curr_balance = balance / IMIN(3, codedBands-i);
@@ -1175,17 +1182,15 @@
             Y = norm;
       }
 
-      /* This ensures we never repeat spectral content within one band */
-      if (lowband_offset != 0)
-         effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N);
-
       /* Get a conservative estimate of the collapse_mask's for the bands we're
           going to be folding from. */
-      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1))
+      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
       {
          int fold_start;
          int fold_end;
          int fold_i;
+         /* This ensures we never repeat spectral content within one band */
+         effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N);
          fold_start = lowband_offset;
          while(M*eBands[--fold_start] > effective_lowband);
          fold_end = lowband_offset-1;
@@ -1224,8 +1229,8 @@
                norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm|y_cm);
          y_cm = x_cm;
       }
-      collapse_masks[i*C+0] = (unsigned char)(x_cm&(1<<B)-1);
-      collapse_masks[i*C+C-1] = (unsigned char)(y_cm&(1<<B)-1);
+      collapse_masks[i*C+0] = (unsigned char)x_cm;
+      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
       balance += pulses[i] + tell;
 
       /* Update the folding position only as long as we have 1 bit/sample depth */
--- a/libcelt/bands.h
+++ b/libcelt/bands.h
@@ -88,7 +88,7 @@
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
       celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
       int time_domain, int fold, int dual_stereo, int intensity, int *tf_res, int resynth,
-      int total_bits, void *enc, int M, int codedBands, ec_uint32 *seed);
+      celt_int32 total_bits, void *enc, int M, int codedBands, ec_uint32 *seed);
 
 
 void stereo_decision(const CELTMode *m, celt_norm * restrict X, int *stereo_mode, int len, int M);
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -1312,7 +1312,7 @@
    ALLOC(collapse_masks, st->mode->nbEBands, unsigned char);
    quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
          bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res, resynth,
-         nbCompressedBytes*8, enc, LM, codedBands, &st->rng);
+         nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, enc, LM, codedBands, &st->rng);
 
    if (anti_collapse_rsv > 0)
    {
@@ -2150,7 +2150,7 @@
    ALLOC(collapse_masks, st->mode->nbEBands, unsigned char);
    quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
          NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 1,
-         len*8, dec, LM, codedBands, &st->rng);
+         len*(8<<BITRES)-anti_collapse_rsv, dec, LM, codedBands, &st->rng);
 
    if (anti_collapse_rsv > 0)
    {