shithub: opus

--- a/celt/arch.h

+++ b/celt/arch.h

@@ -108,6 +108,10 @@

 #define ABS16(x) ((x) < 0 ? (-(x)) : (x))

 #define ABS32(x) ((x) < 0 ? (-(x)) : (x))

+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {

+   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;

+}

 #ifdef FIXED_DEBUG

 #include "fixed_debug.h"

 #else

--- a/celt/celt.h

+++ b/celt/celt.h

@@ -134,7 +134,8 @@

 int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);

-int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);

+int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,

+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);

 #define celt_encoder_ctl opus_custom_encoder_ctl

 #define celt_decoder_ctl opus_custom_decoder_ctl

--- a/celt/celt_decoder.c

+++ b/celt/celt_decoder.c

@@ -191,7 +191,7 @@

 static

 #endif

 void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,

-      celt_sig *mem)

+      celt_sig *mem, int accum)

    int c;

    int Nd;

@@ -199,7 +199,10 @@

    opus_val16 coef0;

    VARDECL(celt_sig, scratch);

    SAVE_STACK;

+#ifndef FIXED_POINT

+   (void)accum;

+   celt_assert(accum==0);

+#endif

    ALLOC(scratch, N, celt_sig);

    coef0 = coef[0];

    Nd = N/downsample;

@@ -238,11 +241,24 @@

          apply_downsampling=1;

       } else {

          /* Shortcut for the standard (non-custom modes) case */

-         for (j=0;j<N;j++)

+#ifdef FIXED_POINT

+         if (accum)

-            celt_sig tmp = x[j] + m + VERY_SMALL;

-            m = MULT16_32_Q15(coef0, tmp);

-            y[j*C] = SCALEOUT(SIG2WORD16(tmp));

+            for (j=0;j<N;j++)

+            {

+               celt_sig tmp = x[j] + m + VERY_SMALL;

+               m = MULT16_32_Q15(coef0, tmp);

+               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));

+            }

+         } else

+#endif

+         {

+            for (j=0;j<N;j++)

+            {

+               celt_sig tmp = x[j] + m + VERY_SMALL;

+               m = MULT16_32_Q15(coef0, tmp);

+               y[j*C] = SCALEOUT(SIG2WORD16(tmp));

+            }

       mem[c] = m;

@@ -250,8 +266,17 @@

       if (apply_downsampling)

          /* Perform down-sampling */

-         for (j=0;j<Nd;j++)

-            y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));

+#ifdef FIXED_POINT

+         if (accum)

+         {

+            for (j=0;j<Nd;j++)

+               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));

+         } else

+#endif

+         {

+            for (j=0;j<Nd;j++)

+               y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));

+         }

    } while (++c<C);

    RESTORE_STACK;

@@ -378,7 +403,8 @@

    pitch of 480 Hz. */

 #define PLC_PITCH_LAG_MIN (100)

-static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)

+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm,

+      int N, int LM, int accum)

    int c;

    int i;

@@ -680,8 +706,7 @@

       } while (++c<C);

-   deemphasis(out_syn, pcm, N, C, downsample,

-         mode->preemph, st->preemph_memD);

+   deemphasis(out_syn, pcm, N, C, downsample, mode->preemph, st->preemph_memD, accum);

    st->loss_count = loss_count+1;

@@ -688,7 +713,8 @@

    RESTORE_STACK;

-int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)

+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,

+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)

    int c, i, N;

    int spread_decision;

@@ -803,7 +829,7 @@

    if (data == NULL || len<=1)

-      celt_decode_lost(st, pcm, N, LM);

+      celt_decode_lost(st, pcm, N, LM, accum);

       RESTORE_STACK;

       return frame_size/st->downsample;

@@ -1030,7 +1056,7 @@

    st->rng = dec->rng;

    /* We reuse freq[] as scratch space for the de-emphasis */

-   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD);

+   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);

    st->loss_count = 0;

    RESTORE_STACK;

    if (ec_tell(dec) > 8*len)

@@ -1046,7 +1072,7 @@

 #ifdef FIXED_POINT

 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)

-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);

+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);

 #ifndef DISABLE_FLOAT_API

@@ -1063,7 +1089,7 @@

    N = frame_size;

    ALLOC(out, C*N, opus_int16);

-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);

+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);

    if (ret>0)

       for (j=0;j<C*ret;j++)

          pcm[j]=out[j]*(1.f/32768.f);

@@ -1077,7 +1103,7 @@

 int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)

-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);

+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);

 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)

@@ -1093,7 +1119,7 @@

    N = frame_size;

    ALLOC(out, C*N, celt_sig);

-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);

+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);

    if (ret>0)

       for (j=0;j<C*ret;j++)

--- a/src/opus_decoder.c

+++ b/src/opus_decoder.c

@@ -77,13 +77,7 @@

    opus_uint32  rangeFinal;

};

-#ifdef FIXED_POINT

-static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {

-   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;

-}

-#endif

 int opus_decoder_get_size(int channels)

    int silkDecSizeBytes, celtDecSizeBytes;

@@ -215,7 +209,7 @@

    VARDECL(opus_val16, pcm_transition_silk);

    int pcm_transition_celt_size;

    VARDECL(opus_val16, pcm_transition_celt);

-   opus_val16 *pcm_transition;

+   opus_val16 *pcm_transition=NULL;

    int redundant_audio_size;

    VARDECL(opus_val16, redundant_audio);

@@ -230,6 +224,7 @@

    int F2_5, F5, F10, F20;

    const opus_val16 *window;

    opus_uint32 redundant_rng = 0;

+   int celt_accum;

    ALLOC_STACK;

    silk_dec = (char*)st+st->silk_dec_offset;

@@ -295,6 +290,14 @@

+   /* In fixed-point, we can tell CELT to do the accumulation on top of the

+      SILK PCM buffer. This saves some stack space. */

+#ifdef FIXED_POINT

+   celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10);

+#else

+   celt_accum = 0;

+#endif

    pcm_transition_silk_size = ALLOC_NONE;

    pcm_transition_celt_size = ALLOC_NONE;

    if (data!=NULL && st->prev_mode > 0 && (

@@ -325,7 +328,7 @@

    /* Don't allocate any memory when in CELT-only mode */

-   pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;

+   pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;

    ALLOC(pcm_silk, pcm_silk_size, opus_int16);

    /* SILK processing */

@@ -332,7 +335,13 @@

    if (mode != MODE_CELT_ONLY)

       int lost_flag, decoded_samples;

-      opus_int16 *pcm_ptr = pcm_silk;

+      opus_int16 *pcm_ptr;

+#ifdef FIXED_POINT

+      if (celt_accum)

+         pcm_ptr = pcm;

+      else

+#endif

+         pcm_ptr = pcm_silk;

       if (st->prev_mode==MODE_CELT_ONLY)

          silk_InitDecoder( silk_dec );

@@ -462,7 +471,7 @@

       celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));

       celt_decode_with_ec(celt_dec, data+len, redundancy_bytes,

-                          redundant_audio, F5, NULL);

+                          redundant_audio, F5, NULL, 0);

       celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));

@@ -477,25 +486,28 @@

          celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);

       /* Decode CELT */

       celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,

-                                     len, pcm, celt_frame_size, &dec);

+                                     len, pcm, celt_frame_size, &dec, celt_accum);

    } else {

       unsigned char silence[2] = {0xFF, 0xFF};

-      for (i=0;i<frame_size*st->channels;i++)

-         pcm[i] = 0;

+      if (!celt_accum)

+      {

+         for (i=0;i<frame_size*st->channels;i++)

+            pcm[i] = 0;

+      }

       /* For hybrid -> SILK transitions, we let the CELT MDCT

          do a fade-out by decoding a silence frame */

       if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) )

          celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));

-         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL);

+         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum);

-   if (mode != MODE_CELT_ONLY)

+   if (mode != MODE_CELT_ONLY && !celt_accum)

 #ifdef FIXED_POINT

       for (i=0;i<frame_size*st->channels;i++)

-         pcm[i] = SAT16(pcm[i] + pcm_silk[i]);

+         pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i]));

 #else

       for (i=0;i<frame_size*st->channels;i++)

          pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);

@@ -514,7 +526,7 @@

       celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);

       celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));

-      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL);

+      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0);

       celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));

       smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,

                   pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);