shithub: opus

--- a/libcelt/bands.c

+++ b/libcelt/bands.c

@@ -212,7 +212,7 @@

 /* This prevents energy collapse for transients with multiple short MDCTs */

-void anti_collapse(const CELTMode *m, celt_norm *_X, int LM, int C, int size,

+void anti_collapse(const CELTMode *m, celt_norm *_X, unsigned char *collapse_masks, int LM, int C, int size,

       int start, int end, celt_word16 *logE, celt_word16 *prev1logE,

       celt_word16 *prev2logE, int *pulses, celt_uint32 seed)

@@ -257,11 +257,8 @@

          X = _X+c*size+(m->eBands[i]<<LM);

          for (k=0;k<1<<LM;k++)

-            celt_word32 sum=0;

             /* Detect collapse */

-            for (j=0;j<N0;j++)

-               sum += ABS16(X[(j<<LM)+k]);

-            if (sum<QCONST16(1e-4, 14))

+            if (!(collapse_masks[i*C+c]&1<<k))

                /* Fill with noise */

                for (j=0;j<N0;j++)

@@ -605,7 +602,7 @@

    the mono and stereo case. Even in the mono case, it can split the band

    in two and transmit the energy difference with the two half-bands. It

    can be called recursively so bands can end up being split in 8 parts. */

-static void quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y,

+static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y,

       int N, int b, int spread, int B, int intensity, int tf_change, celt_norm *lowband, int resynth, void *ec,

       celt_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level,

       celt_int32 *seed, celt_word16 gain, celt_norm *lowband_scratch, int fill)

@@ -623,6 +620,7 @@

    int inv = 0;

    celt_word16 mid=0, side=0;

    int longBlocks;

+   unsigned cm;

    longBlocks = B0==1;

@@ -656,7 +654,7 @@

       } while (++c<1+stereo);

       if (lowband_out)

          lowband_out[0] = SHR16(X[0],4);

-      return;

+      return 1;

    if (!stereo && level == 0)

@@ -680,6 +678,7 @@

             haar1(X, N>>k, 1<<k);

          if (lowband)

             haar1(lowband, N>>k, 1<<k);

+         fill |= fill<<(1<<k);

       B>>=recombine;

       N_B<<=recombine;

@@ -691,6 +690,7 @@

             haar1(X, N_B, B);

          if (lowband)

             haar1(lowband, N_B, B);

+         fill |= fill<<B;

          B <<= 1;

          N_B >>= 1;

          time_divide++;

@@ -718,6 +718,8 @@

          Y = X+N;

          split = 1;

          LM -= 1;

+         if (B==1)

+            fill |= fill<<1;

          B = (B+1)>>1;

@@ -853,11 +855,13 @@

          imid = 32767;

          iside = 0;

+         fill &= (1<<B)-1;

          delta = -16384;

       } else if (itheta == 16384)

          imid = 0;

          iside = 32767;

+         fill &= (1<<B)-1<<B;

          delta = 16384;

       } else {

          imid = bitexact_cos(itheta);

@@ -906,7 +910,9 @@

          sign = 1-2*sign;

-         quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, fill);

+         cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, fill);

+         /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),

+             and there's no need to worry about mixing with the other channel. */

          y2[0] = -sign*x2[1];

          y2[1] = sign*x2[0];

          if (resynth)

@@ -955,12 +961,14 @@

          /* In stereo mode, we do not apply a scaling to the mid because we need the normalized

             mid for folding later */

-         quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,

+         cm = quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,

                lowband, resynth, ec, remaining_bits, LM, next_lowband_out1,

                NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill);

-         quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,

+         /* For a stereo split, the high bits of fill are always zero, so no

+             folding will be done to the side. */

+         cm |= quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,

                next_lowband2, resynth, ec, remaining_bits, LM, NULL,

-               NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill && !stereo);

+               NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<B;

    } else {

@@ -984,9 +992,9 @@

          /* Finally do the actual quantization */

          if (encode)

-            alg_quant(X, N, K, spread, B, lowband, resynth, (ec_enc*)ec, seed, gain);

+            cm = alg_quant(X, N, K, spread, B, lowband, resynth, (ec_enc*)ec, seed, gain);

          else

-            alg_unquant(X, N, K, spread, B, lowband, (ec_dec*)ec, seed, gain);

+            cm = alg_unquant(X, N, K, spread, B, lowband, (ec_dec*)ec, seed, gain);

       } else {

          /* If there's no pulse, fill the band anyway */

          int j;

@@ -996,6 +1004,7 @@

                for (j=0;j<N;j++)

                   X[j] = 0;

+               cm = 0;

             } else {

                if (lowband == NULL || (spread==SPREAD_AGGRESSIVE && B<=1))

@@ -1005,10 +1014,12 @@

                      *seed = lcg_rand(*seed);

                      X[j] = (celt_int32)(*seed)>>20;

+                  cm = (1<<B)-1;

                } else {

                   /* Folded spectrum */

                   for (j=0;j<N;j++)

                      X[j] = lowband[j];

+                  cm = fill;

                renormalise_vector(X, N, gain);

@@ -1022,7 +1033,10 @@

       if (stereo)

          if (N!=2)

+         {

+            cm |= cm>>B;

             stereo_merge(X, Y, mid, N);

+         }

          if (inv)

             int j;

@@ -1044,11 +1058,15 @@

             B >>= 1;

             N_B <<= 1;

+            cm |= cm>>B;

             haar1(X, N_B, B);

          for (k=0;k<recombine;k++)

+         {

+            cm |= cm<<(1<<k);

             haar1(X, N0>>k, 1<<k);

+         }

          B<<=recombine;

          N_B>>=recombine;

@@ -1063,10 +1081,11 @@

+   return cm;

 void quant_all_bands(int encode, const CELTMode *m, int start, int end,

-      celt_norm *_X, celt_norm *_Y, const celt_ener *bandE, int *pulses,

+      celt_norm *_X, celt_norm *_Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,

       int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, int resynth,

       int total_bits, void *ec, int LM, int codedBands)

@@ -1097,7 +1116,7 @@

    else

       seed = ((ec_dec*)ec)->rng;

    balance = 0;

-   lowband_offset = -1;

+   lowband_offset = 0;

    for (i=start;i<end;i++)

       celt_int32 tell;

@@ -1107,7 +1126,9 @@

       int effective_lowband=-1;

       celt_norm * restrict X, * restrict Y;

       int tf_change=0;

+      unsigned x_cm;

+      unsigned y_cm;

       X = _X+M*eBands[i];

       if (_Y!=NULL)

          Y = _Y+M*eBands[i];

@@ -1131,8 +1152,8 @@

          b = 0;

-      if (M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==-1))

-            lowband_offset = M*eBands[i];

+      if (M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))

+            lowband_offset = i;

       tf_change = tf_res[i];

       if (i>=m->effEBands)

@@ -1143,9 +1164,31 @@

       /* This ensures we never repeat spectral content within one band */

-      if (lowband_offset != -1)

-         effective_lowband = IMAX(M*eBands[start], lowband_offset-N);

+      if (lowband_offset != 0)

+         effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N);

+      /* Get a conservative estimate of the collapse_mask's for the bands we're

+          going to be folding from. */

+      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1))

+      {

+         int fold_start;

+         int fold_end;

+         int fold_i;

+         fold_start = lowband_offset;

+         while(M*eBands[--fold_start] > effective_lowband);

+         fold_end = lowband_offset-1;

+         while(M*eBands[++fold_end] < effective_lowband+N);

+         x_cm = y_cm = 0;

+         fold_i = fold_start; do {

+           x_cm |= collapse_masks[fold_i*C+0];

+           y_cm |= collapse_masks[fold_i*C+1];

+         } while (++fold_i<fold_end);

+      }

+      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost

+          always) be non-zero.*/

+      else

+         x_cm = y_cm = (1<<B)-1;

       if (dual_stereo && i==intensity)

          int j;

@@ -1157,16 +1200,19 @@

       if (dual_stereo)

-         quant_band(encode, m, i, X, NULL, N, b/2, spread, B, intensity, tf_change,

+         x_cm = quant_band(encode, m, i, X, NULL, N, b/2, spread, B, intensity, tf_change,

                effective_lowband != -1 ? norm+effective_lowband : NULL, resynth, ec, &remaining_bits, LM,

-               norm+M*eBands[i], bandE, 0, &seed, Q15ONE, lowband_scratch, 1);

-         quant_band(encode, m, i, Y, NULL, N, b/2, spread, B, intensity, tf_change,

+               norm+M*eBands[i], bandE, 0, &seed, Q15ONE, lowband_scratch, x_cm);

+         y_cm = quant_band(encode, m, i, Y, NULL, N, b/2, spread, B, intensity, tf_change,

                effective_lowband != -1 ? norm2+effective_lowband : NULL, resynth, ec, &remaining_bits, LM,

-               norm2+M*eBands[i], bandE, 0, &seed, Q15ONE, lowband_scratch, 1);

+               norm2+M*eBands[i], bandE, 0, &seed, Q15ONE, lowband_scratch, y_cm);

+         collapse_masks[i*2+0] = (unsigned char)(x_cm&(1<<B)-1);

+         collapse_masks[i*2+1] = (unsigned char)(y_cm&(1<<B)-1);

       } else {

-         quant_band(encode, m, i, X, Y, N, b, spread, B, intensity, tf_change,

+         x_cm = quant_band(encode, m, i, X, Y, N, b, spread, B, intensity, tf_change,

                effective_lowband != -1 ? norm+effective_lowband : NULL, resynth, ec, &remaining_bits, LM,

-               norm+M*eBands[i], bandE, 0, &seed, Q15ONE, lowband_scratch, 1);

+               norm+M*eBands[i], bandE, 0, &seed, Q15ONE, lowband_scratch, x_cm|y_cm);

+         collapse_masks[i*C+1] = collapse_masks[i*C+0] = (unsigned char)(x_cm&(1<<B)-1);

       balance += pulses[i] + tell;

--- a/libcelt/bands.h

+++ b/libcelt/bands.h

@@ -86,7 +86,7 @@

  * @param enc Entropy encoder

*/

 void quant_all_bands(int encode, const CELTMode *m, int start, int end,

-      celt_norm * X, celt_norm * Y, const celt_ener *bandE, int *pulses,

+      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,

       int time_domain, int fold, int dual_stereo, int intensity, int *tf_res, int resynth,

       int total_bits, void *enc, int M, int codedBands);

@@ -93,7 +93,7 @@

 void stereo_decision(const CELTMode *m, celt_norm * restrict X, int *stereo_mode, int len, int M);

-void anti_collapse(const CELTMode *m, celt_norm *_X, int LM, int C, int size,

+void anti_collapse(const CELTMode *m, celt_norm *_X, unsigned char *collapse_masks, int LM, int C, int size,

       int start, int end, celt_word16 *logE, celt_word16 *prev1logE,

       celt_word16 *prev2logE, int *pulses, celt_uint32 seed);

--- a/libcelt/celt.c

+++ b/libcelt/celt.c

@@ -792,6 +792,7 @@

    VARDECL(int, offsets);

    VARDECL(int, fine_priority);

    VARDECL(int, tf_res);

+   VARDECL(unsigned char, collapse_masks);

    celt_sig *_overlap_mem;

    celt_sig *prefilter_mem;

    celt_word16 *oldBandE, *oldLogE2;

@@ -819,6 +820,7 @@

    celt_int32 tell;

    int prefilter_tapset=0;

    int pf_on;

+   int anti_collapse_rsv;

    int anti_collapse_on=0;

    SAVE_STACK;

@@ -1265,8 +1267,10 @@

    ALLOC(pulses, st->mode->nbEBands, int);

    ALLOC(fine_priority, st->mode->nbEBands, int);

-   /* bits =   packet size        -       where we are         - safety -  anti-collapse*/

-   bits = (nbCompressedBytes*8<<BITRES) - ec_enc_tell(enc, BITRES) - 1 - (isTransient&&LM>=2 ? (1<<BITRES) : 0);

+   /* bits =   packet size        -       where we are         - safety*/

+   bits = (nbCompressedBytes*8<<BITRES) - ec_enc_tell(enc, BITRES) - 1;

+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=(LM+2<<BITRES) ? (1<<BITRES) : 0;

+   bits -= anti_collapse_rsv;

    codedBands = compute_allocation(st->mode, st->start, st->end, offsets,

          alloc_trim, &intensity, &dual_stereo, bits, pulses, fine_quant,

          fine_priority, C, LM, enc, 1, st->lastCodedBands);

@@ -1286,11 +1290,12 @@

 #endif

    /* Residual quantisation */

-   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL,

+   ALLOC(collapse_masks, st->mode->nbEBands, unsigned char);

+   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,

          bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res, resynth,

          nbCompressedBytes*8, enc, LM, codedBands);

-   if (isTransient && LM>=2)

+   if (anti_collapse_rsv > 0)

       anti_collapse_on = st->consec_transient<2;

       ec_enc_bits(enc, anti_collapse_on, 1);

@@ -1311,7 +1316,7 @@

 #endif

       if (anti_collapse_on)

-         anti_collapse(st->mode, X, LM, C, N,

+         anti_collapse(st->mode, X, collapse_masks, LM, C, N,

                st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, enc->rng);

@@ -1882,6 +1887,7 @@

    VARDECL(int, offsets);

    VARDECL(int, fine_priority);

    VARDECL(int, tf_res);

+   VARDECL(unsigned char, collapse_masks);

    celt_sig *out_mem[2];

    celt_sig *decode_mem[2];

    celt_sig *overlap_mem[2];

@@ -1905,6 +1911,7 @@

    celt_int32 tell;

    int dynalloc_logp;

    int postfilter_tapset;

+   int anti_collapse_rsv;

    int anti_collapse_on=0;

    SAVE_STACK;

@@ -2060,7 +2067,9 @@

    alloc_trim = tell+(6<<BITRES) <= total_bits ?

          ec_dec_icdf(dec, trim_icdf, 7) : 5;

-   bits = (len*8<<BITRES) - ec_dec_tell(dec, BITRES) - 1 - (isTransient&&LM>=2 ? (1<<BITRES) : 0);

+   bits = (len*8<<BITRES) - ec_dec_tell(dec, BITRES) - 1;

+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=(LM+2<<BITRES) ? (1<<BITRES) : 0;

+   bits -= anti_collapse_rsv;

    codedBands = compute_allocation(st->mode, st->start, st->end, offsets,

          alloc_trim, &intensity, &dual_stereo, bits, pulses, fine_quant,

          fine_priority, C, LM, dec, 0, 0);

@@ -2068,11 +2077,12 @@

    unquant_fine_energy(st->mode, st->start, st->end, bandE, oldBandE, fine_quant, dec, C);

    /* Decode fixed codebook */

-   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL,

+   ALLOC(collapse_masks, st->mode->nbEBands, unsigned char);

+   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,

          NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 1,

          len*8, dec, LM, codedBands);

-   if (isTransient && LM>=2)

+   if (anti_collapse_rsv > 0)

       anti_collapse_on = ec_dec_bits(dec, 1);

@@ -2081,7 +2091,7 @@

          fine_quant, fine_priority, len*8-ec_dec_tell(dec, 0), dec, C);

    if (anti_collapse_on)

-      anti_collapse(st->mode, X, LM, C, N,

+      anti_collapse(st->mode, X, collapse_masks, LM, C, N,

             st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, dec->rng);

    log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);

--- a/libcelt/vq.c

+++ b/libcelt/vq.c

@@ -106,6 +106,8 @@

       while ((stride2*stride2+stride2)*stride + (stride>>2) < len)

          stride2++;

+   /*TODO: We should be passing around log2(B), not B, for both this and for

+      extract_collapse_mask().*/

    len /= stride;

    for (i=0;i<stride;i++)

@@ -153,7 +155,27 @@

    while (++i < N);

-void alg_quant(celt_norm *X, int N, int K, int spread, int B, celt_norm *lowband,

+static unsigned extract_collapse_mask(int *iy, int N, int B)

+{

+   unsigned collapse_mask;

+   int N0;

+   int i;

+   if (B<=1)

+      return 1;

+   /*TODO: We should be passing around log2(B), not B, for both this and for

+      exp_rotation().*/

+   N0 = N/B;

+   collapse_mask = 0;

+   i=0; do {

+      int j;

+      j=0; do {

+         collapse_mask |= (iy[i*N0+j]!=0)<<i;

+      } while (++j<N0);

+   } while (++i<B);

+   return collapse_mask;

+}

+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, celt_norm *lowband,

       int resynth, ec_enc *enc, celt_int32 *seed, celt_word16 gain)

    VARDECL(celt_norm, y);

@@ -165,6 +187,7 @@

    celt_word32 sum;

    celt_word32 xy;

    celt_word16 yy;

+   unsigned collapse_mask;

    SAVE_STACK;

    celt_assert2(K!=0, "alg_quant() needs at least one pulse");

@@ -308,17 +331,20 @@

       normalise_residual(iy, X, N, K, yy, gain);

       exp_rotation(X, N, -1, B, K, spread);

+   collapse_mask = extract_collapse_mask(iy, N, B);

    RESTORE_STACK;

+   return collapse_mask;

 /** Decode pulse vector and combine the result with the pitch vector to produce

     the final normalised signal in the current band. */

-void alg_unquant(celt_norm *X, int N, int K, int spread, int B,

+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,

       celt_norm *lowband, ec_dec *dec, celt_int32 *seed, celt_word16 gain)

    int i;

    celt_word32 Ryy;

+   unsigned collapse_mask;

    VARDECL(int, iy);

    SAVE_STACK;

@@ -332,7 +358,9 @@

    } while (++i < N);

    normalise_residual(iy, X, N, K, Ryy, gain);

    exp_rotation(X, N, -1, B, K, spread);

+   collapse_mask = extract_collapse_mask(iy, N, B);

    RESTORE_STACK;

+   return collapse_mask;

 void renormalise_vector(celt_norm *X, int N, celt_word16 gain)

--- a/libcelt/vq.h

+++ b/libcelt/vq.h

@@ -50,8 +50,9 @@

  * @param K Number of pulses to use

  * @param p Pitch vector (it is assumed that p+x is a unit vector)

  * @param enc Entropy encoder state

+ * @ret A mask indicating which blocks in the band received pulses

*/

-void alg_quant(celt_norm *X, int N, int K, int spread, int B, celt_norm *lowband,

+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, celt_norm *lowband,

       int resynth, ec_enc *enc, celt_int32 *seed, celt_word16 gain);

 /** Algebraic pulse decoder

@@ -60,8 +61,9 @@

  * @param K Number of pulses to use

  * @param p Pitch vector (automatically added to x)

  * @param dec Entropy decoder state

+ * @ret A mask indicating which blocks in the band received pulses

*/

-void alg_unquant(celt_norm *X, int N, int K, int spread, int B,

+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,

       celt_norm *lowband, ec_dec *dec, celt_int32 *seed, celt_word16 gain);

 void renormalise_vector(celt_norm *X, int N, celt_word16 gain);