shithub: opus

--- a/Makefile.am

+++ b/Makefile.am

@@ -20,8 +20,9 @@

 SILK_SOURCES += $(SILK_SOURCES_FIXED)

 else

 SILK_SOURCES += $(SILK_SOURCES_FLOAT)

-OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)

 endif

+OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)

 if CPU_ARM

 CELT_SOURCES += $(CELT_SOURCES_ARM)

--- a/celt/arch.h

+++ b/celt/arch.h

@@ -185,6 +185,7 @@

 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))

 #define MULT16_16_Q11_32(a,b)     ((a)*(b))

+#define MULT16_16_Q11(a,b)     ((a)*(b))

 #define MULT16_16_Q13(a,b)     ((a)*(b))

 #define MULT16_16_Q14(a,b)     ((a)*(b))

 #define MULT16_16_Q15(a,b)     ((a)*(b))

--- a/celt/celt.h

+++ b/celt/celt.h

@@ -52,11 +52,11 @@

 typedef struct {

    int valid;

-   opus_val16 tonality;

-   opus_val16 tonality_slope;

-   opus_val16 noisiness;

-   opus_val16 activity;

-   opus_val16 music_prob;

+   float tonality;

+   float tonality_slope;

+   float noisiness;

+   float activity;

+   float music_prob;

    int        bandwidth;

 }AnalysisInfo;

@@ -109,10 +109,7 @@

 #define OPUS_SET_LFE_REQUEST    10024

 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)

-#define OPUS_SET_ENERGY_SAVE_REQUEST    10026

-#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)

-#define OPUS_SET_ENERGY_MASK_REQUEST    10028

+#define OPUS_SET_ENERGY_MASK_REQUEST    10026

 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)

 /* Encoder stuff */

@@ -192,6 +189,9 @@

 extern const signed char tf_select_table[4][8];

 int resampling_factor(opus_int32 rate);

+void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,

+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);

 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,

       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,

--- a/celt/celt_encoder.c

+++ b/celt/celt_encoder.c

@@ -111,7 +111,6 @@

    opus_val32 overlap_max;

    opus_val16 stereo_saving;

    int intensity;

-   opus_val16 *energy_save;

    opus_val16 *energy_mask;

    opus_val16 spec_avg;

@@ -452,7 +451,7 @@

-static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,

+void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,

                         int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)

    int i;

@@ -744,7 +743,7 @@

 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,

       const opus_val16 *bandLogE, int end, int LM, int C, int N0,

       AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,

-      int intensity)

+      int intensity, opus_val16 surround_trim)

    int i;

    opus_val32 diff=0;

@@ -818,11 +817,12 @@

    if (diff < -QCONST16(10.f, DB_SHIFT))

       trim_index++;

    trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));

+   trim -= SHR16(surround_trim, DB_SHIFT-8);

    trim -= 2*SHR16(tf_estimate, 14-8);

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

    if (analysis->valid)

-      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f)));

+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)));

 #endif

@@ -877,7 +877,7 @@

 static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,

       int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,

       int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,

-      int effectiveBytes, opus_int32 *tot_boost_, int lfe)

+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)

    int i, c;

    opus_int32 tot_boost=0;

@@ -940,6 +940,8 @@

             follower[i] = MAX16(0, bandLogE[i]-follower[i]);

+      for (i=start;i<end;i++)

+         follower[i] = MAX16(follower[i], surround_dynalloc[i]);

       /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */

       if ((!vbr || constrained_vbr)&&!isTransient)

@@ -1140,7 +1142,7 @@

    target = base_target;

    /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

    if (analysis->valid && analysis->activity<.4)

       target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));

 #endif

@@ -1165,7 +1167,7 @@

                     QCONST16(0.02f,14) : QCONST16(0.04f,14);

    target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

    /* Apply tonality boost */

    if (analysis->valid && !lfe)

@@ -1291,6 +1293,8 @@

    int transient_got_disabled=0;

    opus_val16 surround_masking=0;

    opus_val16 temporal_vbr=0;

+   opus_val16 surround_trim = 0;

+   VARDECL(opus_val16, surround_dynalloc);

    ALLOC_STACK;

    mode = st->mode;

@@ -1526,37 +1530,83 @@

    amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);

-   if (st->energy_save)

-   {

-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;

-#ifdef FIXED_POINT

-      /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */

-      offset -= QCONST16(3.0f, DB_SHIFT);

-#endif

-      for(i=0;i<C*nbEBands;i++)

-         st->energy_save[i]=bandLogE[i]-offset;

-      st->energy_save=NULL;

-   }

+   ALLOC(surround_dynalloc, C*nbEBands, opus_val16);

+   for(i=0;i<st->end;i++)

+      surround_dynalloc[i] = 0;

    /* This computes how much masking takes place between surround channels */

-   if (st->energy_mask&&!st->lfe)

+   if (st->start==0&&st->energy_mask&&!st->lfe)

+      int mask_end;

+      int midband;

+      int count_dynalloc;

       opus_val32 mask_avg=0;

-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;

+      opus_val32 diff=0;

+      int count=0;

+      mask_end = st->lastCodedBands;

       for (c=0;c<C;c++)

-         opus_val16 followE, followMask;

-         followE = followMask = -QCONST16(14.f, DB_SHIFT);

-         for(i=0;i<st->end;i++)

+         for(i=0;i<mask_end;i++)

-            /* We use a simple follower to approximate the masking spreading function. */

-            followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset);

-            followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]);

-            mask_avg += followE-followMask;

+            opus_val16 mask;

+            mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],

+                   QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));

+            if (mask > 0)

+               mask = HALF16(mask);

+            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);

+            count += eBands[i+1]-eBands[i];

+            diff += MULT16_16(mask, 1+2*i-mask_end);

-      surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT);

-      surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT));

-      surround_masking -= HALF16(HALF16(surround_masking));

+      mask_avg = DIV32_16(mask_avg,count);

+      mask_avg += QCONST16(.2f, DB_SHIFT);

+      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);

+      /* Again, being conservative */

+      diff = HALF32(diff);

+      diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));

+      /* Find the band that's in the middle of the coded spectrum */

+      for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);

+      count_dynalloc=0;

+      for(i=0;i<mask_end;i++)

+      {

+         opus_val32 lin;

+         opus_val16 unmask;

+         lin = mask_avg + diff*(i-midband);

+         if (C==2)

+            unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);

+         else

+            unmask = st->energy_mask[i];

+         unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));

+         unmask -= lin;

+         if (unmask > QCONST16(.25f, DB_SHIFT))

+         {

+            surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);

+            count_dynalloc++;

+         }

+      }

+      if (count_dynalloc>=3)

+      {

+         /* If we need dynalloc in many bands, it's probably because our

+            initial masking rate was too low. */

+         mask_avg += QCONST16(.25f, DB_SHIFT);

+         if (mask_avg>0)

+         {

+            /* Something went really wrong in the original calculations,

+               disabling masking. */

+            mask_avg = 0;

+            diff = 0;

+            for(i=0;i<mask_end;i++)

+               surround_dynalloc[i] = 0;

+         } else {

+            for(i=0;i<mask_end;i++)

+               surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));

+         }

+      }

+      mask_avg += QCONST16(.2f, DB_SHIFT);

+      /* Convert to 1/64th units used for the trim */

+      surround_trim = 64*diff;

+      /*printf("%d %d ", mask_avg, surround_trim);*/

+      surround_masking = mask_avg;

    /* Temporal VBR (but not for LFE) */

    if (!st->lfe)

@@ -1683,7 +1733,7 @@

    maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,

          st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,

-         eBands, LM, effectiveBytes, &tot_boost, st->lfe);

+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);

    /* For LFE, everything interesting is in the first band */

    if (st->lfe)

       offsets[0] = IMIN(8, effectiveBytes/3);

@@ -1756,7 +1806,7 @@

          alloc_trim = 5;

       else

          alloc_trim = alloc_trim_analysis(mode, X, bandLogE,

-            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);

+            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);

       ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);

       tell = ec_tell_frac(enc);

@@ -1859,7 +1909,7 @@

    anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;

    bits -= anti_collapse_rsv;

    signalBandwidth = st->end-1;

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

    if (st->analysis.valid)

       int min_bandwidth;

@@ -2259,12 +2309,6 @@

           opus_int32 value = va_arg(ap, opus_int32);

           st->lfe = value;

-      }

-      break;

-      case OPUS_SET_ENERGY_SAVE_REQUEST:

-      {

-          opus_val16 *value = va_arg(ap, opus_val16*);

-          st->energy_save=value;

       break;

       case OPUS_SET_ENERGY_MASK_REQUEST:

--- a/celt/fixed_generic.h

+++ b/celt/fixed_generic.h

@@ -116,6 +116,7 @@

 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))

 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))

+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))

 #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))

 #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))

 #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))

--- a/src/analysis.c

+++ b/src/analysis.c

@@ -184,12 +184,12 @@

    for (;i<DETECT_SIZE;i++)

       psum += tonal->pspeech[i];

    psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;

-   /*printf("%f %f\n", psum, info_out->music_prob);*/

+   /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/

    info_out->music_prob = psum;

-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)

+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)

     int i, b;

     const kiss_fft_state *kfft;

@@ -234,7 +234,7 @@

     kfft = celt_mode->mdct.kfft[0];

     if (tonal->count==0)

        tonal->mem_fill = 240;

-    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);

+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);

     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)

        tonal->mem_fill += len;

@@ -253,14 +253,14 @@

     for (i=0;i<N2;i++)

        float w = analysis_window[i];

-       in[i].r = MULT16_16(w, tonal->inmem[i]);

-       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);

-       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);

-       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);

+       in[i].r = w*tonal->inmem[i];

+       in[i].i = w*tonal->inmem[N2+i];

+       in[N-i-1].r = w*tonal->inmem[N-i-1];

+       in[N-i-1].i = w*tonal->inmem[N+N2-i-1];

     OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);

     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);

-    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);

+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);

     tonal->mem_fill = 240 + remaining;

     opus_fft(kfft, in, out);

@@ -325,8 +325,12 @@

        float stationarity;

        for (i=tbands[b];i<tbands[b+1];i++)

-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r

-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;

+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r

+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;

+#ifdef FIXED_POINT

+          /* FIXME: It's probably best to change the BFCC filter initial state instead */

+          binE *= 5.55e-17f;

+#endif

           E += binE;

           tE += binE*tonality[i];

           nE += binE*2.f*(.5f-noisiness[i]);

@@ -334,7 +338,7 @@

        tonal->E[tonal->E_count][b] = E;

        frame_noisiness += nE/(1e-15f+E);

-       frame_loudness += celt_sqrt(E+1e-10f);

+       frame_loudness += sqrt(E+1e-10f);

        logE[b] = (float)log(E+1e-10f);

        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);

        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);

@@ -343,21 +347,21 @@

           tonal->highE[b]+=.5f;

           tonal->lowE[b]-=.5f;

-       relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);

+       relativeE += (logE[b]-tonal->lowE[b])/(1e-15+tonal->highE[b]-tonal->lowE[b]);

        L1=L2=0;

        for (i=0;i<NB_FRAMES;i++)

-          L1 += celt_sqrt(tonal->E[i][b]);

+          L1 += sqrt(tonal->E[i][b]);

           L2 += tonal->E[i][b];

-       stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));

+       stationarity = MIN16(0.99f,L1/sqrt(1e-15+NB_FRAMES*L2));

        stationarity *= stationarity;

        stationarity *= stationarity;

        frame_stationarity += stationarity;

        /*band_tonality[b] = tE/(1e-15+E)*/;

-       band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);

+       band_tonality[b] = MAX16(tE/(1e-15+E), stationarity*tonal->prev_band_tonality[b]);

 #if 0

        if (b>=NB_TONAL_SKIP_BANDS)

@@ -379,6 +383,9 @@

     bandwidth = 0;

     maxE = 0;

     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));

+#ifdef FIXED_POINT

+    noise_floor *= 1<<(15+SIG_SHIFT);

+#endif

     noise_floor *= noise_floor;

     for (b=0;b<NB_TOT_BANDS;b++)

@@ -389,8 +396,8 @@

        band_end = extra_bands[b+1];

        for (i=band_start;i<band_end;i++)

-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r

-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;

+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r

+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;

           E += binE;

        maxE = MAX32(maxE, E);

@@ -469,7 +476,7 @@

        tonal->mem[i] = BFCC[i];

     for (i=0;i<9;i++)

-       features[11+i] = celt_sqrt(tonal->std[i]);

+       features[11+i] = sqrt(tonal->std[i]);

     features[20] = info->tonality;

     features[21] = info->activity;

     features[22] = frame_stationarity;

@@ -476,7 +483,7 @@

     features[23] = info->tonality_slope;

     features[24] = tonal->lowECount;

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

     mlp_process(&net, features, frame_probs);

     frame_probs[0] = .5f*(frame_probs[0]+1);

     /* Curve fitting between the MLP probability and the actual probability */

@@ -611,44 +618,30 @@

     RESTORE_STACK;

-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,

-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,

-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)

+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,

+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,

+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)

    int offset;

    int pcm_len;

-   /* Avoid overflow/wrap-around of the analysis buffer */

-   frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);

+   if (analysis_pcm != NULL)

+   {

+      /* Avoid overflow/wrap-around of the analysis buffer */

+      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);

-   pcm_len = frame_size - analysis->analysis_offset;

-   offset = 0;

-   do {

-      tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);

-      offset += 480;

-      pcm_len -= 480;

-   } while (pcm_len>0);

-   analysis->analysis_offset = frame_size;

+      pcm_len = analysis_frame_size - analysis->analysis_offset;

+      offset = analysis->analysis_offset;

+      do {

+         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);

+         offset += 480;

+         pcm_len -= 480;

+      } while (pcm_len>0);

+      analysis->analysis_offset = analysis_frame_size;

-   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)

-   {

-      int LM = 3;

-      LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps,

-            analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);

-      while ((Fs/400<<LM)>frame_size)

-         LM--;

-      frame_size = (Fs/400<<LM);

-   } else {

-      frame_size = frame_size_select(frame_size, variable_duration, Fs);

+      analysis->analysis_offset -= frame_size;

-   if (frame_size<0)

-      return -1;

-   analysis->analysis_offset -= frame_size;

-   /* Only perform analysis up to 20-ms frames. Longer ones will be split if

-      they're in CELT-only mode. */

    analysis_info->valid = 0;

    tonality_get_info(analysis, analysis_info, frame_size);

-   return frame_size;

--- a/src/analysis.h

+++ b/src/analysis.h

@@ -42,7 +42,7 @@

    float angle[240];

    float d_angle[240];

    float d2_angle[240];

-   float inmem[ANALYSIS_BUF_SIZE];

+   opus_val32 inmem[ANALYSIS_BUF_SIZE];

    int   mem_fill;                      /* number of usable samples in the buffer */

    float prev_band_tonality[NB_TBANDS];

    float prev_tonality;

@@ -79,12 +79,12 @@

 } TonalityAnalysisState;

 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,

-     const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);

+     const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);

 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);

-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,

-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,

-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);

+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,

+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,

+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);

 #endif

--- a/src/mlp.c

+++ b/src/mlp.c

@@ -35,7 +35,7 @@

 #include "tansig_table.h"

 #define MAX_NEURONS 100

-#ifdef FIXED_POINT

+#if 0

 static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */

 	int i;

@@ -43,9 +43,9 @@

 	/*double x, y;*/

 	opus_val16 dy, yy; /* Q14 */

 	/*x = 1.9073e-06*_x;*/

-	if (_x>=QCONST32(10,19))

+	if (_x>=QCONST32(8,19))

 		return QCONST32(1.,14);

-	if (_x<=-QCONST32(10,19))

+	if (_x<=-QCONST32(8,19))

 		return -QCONST32(1.,14);

 	xx = EXTRACT16(SHR32(_x, 8));

 	/*i = lrint(25*x);*/

@@ -62,11 +62,11 @@

 #else

 /*extern const float tansig_table[501];*/

-static inline opus_val16 tansig_approx(opus_val16 x)

+static inline float tansig_approx(float x)

 	int i;

-	opus_val16 y, dy;

-	opus_val16 sign=1;

+	float y, dy;

+	float sign=1;

     if (x>=8)

         return 1;

     if (x<=-8)

@@ -85,6 +85,7 @@

 #endif

+#if 0

 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)

 	int j;

@@ -108,4 +109,28 @@

 		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));

+#else

+void mlp_process(const MLP *m, const float *in, float *out)

+{

+    int j;

+    float hidden[MAX_NEURONS];

+    const float *W = m->weights;

+    /* Copy to tmp_in */

+    for (j=0;j<m->topo[1];j++)

+    {

+        int k;

+        float sum = *W++;

+        for (k=0;k<m->topo[0];k++)

+            sum = sum + in[k]**W++;

+        hidden[j] = tansig_approx(sum);

+    }

+    for (j=0;j<m->topo[2];j++)

+    {

+        int k;

+        float sum = *W++;

+        for (k=0;k<m->topo[1];k++)

+            sum = sum + hidden[k]**W++;

+        out[j] = tansig_approx(sum);

+    }

+}

+#endif

--- a/src/mlp.h

+++ b/src/mlp.h

@@ -33,9 +33,9 @@

 typedef struct {

 	int layers;

 	const int *topo;

-	const opus_val16 *weights;

+	const float *weights;

 } MLP;

-void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out);

+void mlp_process(const MLP *m, const float *in, float *out);

 #endif /* _MLP_H_ */

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -95,10 +95,10 @@

     int          silk_bw_switch;

     /* Sampling rate (at the API level) */

     int          first;

-    int          energy_masking;

+    opus_val16 * energy_masking;

     StereoWidthState width_mem;

     opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

     TonalityAnalysisState analysis;

     int          detected_bandwidth;

     int          analysis_offset;

@@ -201,7 +201,7 @@

     st->silk_mode.payloadSize_ms            = 20;

     st->silk_mode.bitRate                   = 25000;

     st->silk_mode.packetLossPercentage      = 0;

-    st->silk_mode.complexity                = 10;

+    st->silk_mode.complexity                = 9;

     st->silk_mode.useInBandFEC              = 0;

     st->silk_mode.useDTX                    = 0;

     st->silk_mode.useCBR                    = 0;

@@ -212,7 +212,7 @@

     if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;

     celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));

-    celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(10));

+    celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));

     st->use_vbr = 1;

     /* Makes constrained VBR the default (safer for real-time use) */

@@ -551,7 +551,7 @@

     return st->user_bitrate_bps;

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

 /* Don't use more than 60 ms for the frame size analysis */

 #define MAX_DYNAMIC_FRAMESIZE 24

 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */

@@ -685,32 +685,6 @@

    return best_state;

-void downmix_float(const void *_x, float *sub, int subframe, int offset, int C)

-{

-   const float *x;

-   int c, j;

-   x = (const float *)_x;

-   for (j=0;j<subframe;j++)

-      sub[j] = x[(j+offset)*C];

-   for (c=1;c<C;c++)

-      for (j=0;j<subframe;j++)

-         sub[j] += x[(j+offset)*C+c];

-}

-void downmix_int(const void *_x, float *sub, int subframe, int offset, int C)

-{

-   const opus_int16 *x;

-   int c, j;

-   x = (const opus_int16 *)_x;

-   for (j=0;j<subframe;j++)

-      sub[j] = x[(j+offset)*C];

-   for (c=1;c<C;c++)

-      for (j=0;j<subframe;j++)

-         sub[j] += x[(j+offset)*C+c];

-   for (j=0;j<subframe;j++)

-      sub[j] *= (1.f/32768);

-}

 int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,

                 int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,

                 downmix_func downmix)

@@ -723,10 +697,10 @@

    int bestLM=0;

    int subframe;

    int pos;

-   VARDECL(opus_val16, sub);

+   VARDECL(opus_val32, sub);

    subframe = Fs/400;

-   ALLOC(sub, subframe, opus_val16);

+   ALLOC(sub, subframe, opus_val32);

    e[0]=mem[0];

    e_1[0]=1.f/(EPSILON+mem[0]);

    if (buffering)

@@ -754,7 +728,7 @@

       int j;

       tmp=EPSILON;

-      downmix(x, sub, subframe, i*subframe, C);

+      downmix(x, sub, subframe, i*subframe, 0, -2, C);

       if (i==0)

          memx = sub[0];

       for (j=0;j<subframe;j++)

@@ -784,6 +758,76 @@

 #endif

+#ifndef DISABLE_FLOAT_API

+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)

+{

+   const float *x;

+   opus_val32 scale;

+   int j;

+   x = (const float *)_x;

+   for (j=0;j<subframe;j++)

+      sub[j] = SCALEIN(x[(j+offset)*C+c1]);

+   if (c2>-1)

+   {

+      for (j=0;j<subframe;j++)

+         sub[j] += SCALEIN(x[(j+offset)*C+c2]);

+   } else if (c2==-2)

+   {

+      int c;

+      for (c=1;c<C;c++)

+      {

+         for (j=0;j<subframe;j++)

+            sub[j] += SCALEIN(x[(j+offset)*C+c]);

+      }

+   }

+#ifdef FIXED_POINT

+   scale = (1<<SIG_SHIFT);

+#else

+   scale = 1.f;

+#endif

+   if (C==-2)

+      scale /= C;

+   else

+      scale /= 2;

+   for (j=0;j<subframe;j++)

+      sub[j] *= scale;

+}

+#endif

+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)

+{

+   const opus_int16 *x;

+   opus_val32 scale;

+   int j;

+   x = (const opus_int16 *)_x;

+   for (j=0;j<subframe;j++)

+      sub[j] = x[(j+offset)*C+c1];

+   if (c2>-1)

+   {

+      for (j=0;j<subframe;j++)

+         sub[j] += x[(j+offset)*C+c2];

+   } else if (c2==-2)

+   {

+      int c;

+      for (c=1;c<C;c++)

+      {

+         for (j=0;j<subframe;j++)

+            sub[j] += x[(j+offset)*C+c];

+      }

+   }

+#ifdef FIXED_POINT

+   scale = (1<<SIG_SHIFT);

+#else

+   scale = 1.f/32768;

+#endif

+   if (C==-2)

+      scale /= C;

+   else

+      scale /= 2;

+   for (j=0;j<subframe;j++)

+      sub[j] *= scale;

+}

 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)

    int new_size;

@@ -805,6 +849,29 @@

    return new_size;

+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,

+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,

+      int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem)

+{

+#ifndef DISABLE_FLOAT_API

+   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)

+   {

+      int LM = 3;

+      LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps,

+            0, subframe_mem, delay_compensation, downmix);

+      while ((Fs/400<<LM)>frame_size)

+         LM--;

+      frame_size = (Fs/400<<LM);

+   } else

+#endif

+   {

+      frame_size = frame_size_select(frame_size, variable_duration, Fs);

+   }

+   if (frame_size<0)

+      return -1;

+   return frame_size;

+}

 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)

    opus_val16 corr;

@@ -883,11 +950,8 @@

 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,

-                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth

-#ifndef FIXED_POINT

-                , AnalysisInfo *analysis_info

-#endif

-                )

+                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,

+                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)

     void *silk_enc;

     CELTEncoder *celt_enc;

@@ -916,6 +980,10 @@

     opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */

     int total_buffer;

     opus_val16 stereo_width;

+    const CELTMode *celt_mode;

+    AnalysisInfo analysis_info;

+    int analysis_read_pos_bak=-1;

+    int analysis_read_subframe_bak=-1;

     VARDECL(opus_val16, tmp_prefill);

     ALLOC_STACK;

@@ -941,17 +1009,34 @@

     lsb_depth = IMIN(lsb_depth, st->lsb_depth);

+    analysis_info.valid = 0;

+    celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));

+#ifndef DISABLE_FLOAT_API

+#ifdef FIXED_POINT

+    if (st->silk_mode.complexity >= 10 && st->Fs==48000)

+#else

+    if (st->silk_mode.complexity >= 7 && st->Fs==48000)

+#endif

+    {

+       analysis_read_pos_bak = st->analysis.read_pos;

+       analysis_read_subframe_bak = st->analysis.read_subframe;

+       run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,

+             c1, c2, analysis_channels, st->Fs,

+             lsb_depth, downmix, &analysis_info);

+    }

+#endif

     st->voice_ratio = -1;

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

     st->detected_bandwidth = 0;

-    if (analysis_info->valid)

+    if (analysis_info.valid)

        int analysis_bandwidth;

        if (st->signal_type == OPUS_AUTO)

-          st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob));

+          st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));

-       analysis_bandwidth = analysis_info->bandwidth;

+       analysis_bandwidth = analysis_info.bandwidth;

        if (analysis_bandwidth<=12)

           st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;

        else if (analysis_bandwidth<=14)

@@ -1281,6 +1366,11 @@

        VARDECL(OpusRepacketizer, rp);

        opus_int32 bytes_per_frame;

+       if (analysis_read_pos_bak!= -1)

+       {

+          st->analysis.read_pos = analysis_read_pos_bak;

+          st->analysis.read_subframe = analysis_read_subframe_bak;

+       }

        nb_frames = frame_size > st->Fs/25 ? 3 : 2;

        bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames);

@@ -1310,11 +1400,9 @@

           /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */

           if (to_celt && i==nb_frames-1)

              st->user_forced_mode = MODE_CELT_ONLY;

-          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth

-#ifndef FIXED_POINT

-                , analysis_info

-#endif

-                );

+          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,

+                tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,

+                NULL, 0, c1, c2, analysis_channels, downmix);

           if (tmp_len<0)

              RESTORE_STACK;

@@ -1419,6 +1507,45 @@

             st->silk_mode.bitRate = total_bitRate;

+        /* Surround masking for SILK */

+        if (st->energy_masking && st->use_vbr && !st->lfe)

+        {

+           opus_val32 mask_sum=0;

+           opus_val16 masking_depth;

+           opus_int32 rate_offset;

+           int c;

+           int end = 17;

+           opus_int16 srate = 16000;

+           if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)

+           {

+              end = 13;

+              srate = 8000;

+           } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)

+           {

+              end = 15;

+              srate = 12000;

+           }

+           for (c=0;c<st->channels;c++)

+           {

+              for(i=0;i<end;i++)

+              {

+                 opus_val16 mask;

+                 mask = MAX16(MIN16(st->energy_masking[21*c+i],

+                        QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));

+                 if (mask > 0)

+                    mask = HALF16(mask);

+                 mask_sum += mask;

+              }

+           }

+           /* Conservative rate reduction, we cut the masking in half */

+           masking_depth = HALF16(mask_sum / end*st->channels);

+           rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);

+           rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);

+           rate_offset += QCONST16(.4f, DB_SHIFT);

+           st->silk_mode.bitRate += rate_offset;

+           bytes_target += rate_offset * frame_size / (8 * st->Fs);

+        }

         st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;

         st->silk_mode.nChannelsAPI = st->channels;

         st->silk_mode.nChannelsInternal = st->stream_channels;

@@ -1476,9 +1603,7 @@

         if (prefill)

             opus_int32 zero=0;

-            const CELTMode *celt_mode;

             int prefill_offset;

-            celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));

             /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode

                a discontinuity. The exact location is what we need to avoid leaving any "gap"

                in the audio when mixing with the redundant CELT frame. Here we can afford to

@@ -1589,12 +1714,12 @@

             if (st->use_vbr)

                 opus_int32 bonus=0;

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

                 if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)

                    bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);

-                   if (analysis_info->valid)

-                      bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info->tonality));

+                   if (analysis_info.valid)

+                      bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality));

 #endif

                 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));

@@ -1625,9 +1750,6 @@

     /* gain_fade() and stereo_fade() need to be after the buffer copying

        because we don't want any of this to affect the SILK part */

     if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {

-       const CELTMode *celt_mode;

-       celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));

        gain_fade(pcm_buf, pcm_buf,

              st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);

@@ -1638,9 +1760,6 @@

         /* Apply stereo width reduction (at low bitrates) */

         if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {

             opus_val16 g1, g2;

-            const CELTMode *celt_mode;

-            celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));

             g1 = st->hybrid_stereo_width_Q14;

             g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);

 #ifdef FIXED_POINT

@@ -1697,9 +1816,9 @@

        ec_enc_shrink(&enc, nb_compr_bytes);

-#ifndef FIXED_POINT

+#ifndef DISABLE_FLOAT_API

     if (redundancy || st->mode != MODE_SILK_ONLY)

-       celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));

+       celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));

 #endif

     /* 5 ms redundant frame for CELT->SILK */

@@ -1825,114 +1944,88 @@

 #ifdef FIXED_POINT

 #ifndef DISABLE_FLOAT_API

-opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,

+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,

       unsigned char *data, opus_int32 max_data_bytes)

    int i, ret;

+   int frame_size;

+   int delay_compensation;

    VARDECL(opus_int16, in);

    ALLOC_STACK;

-   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

-   if(frame_size<0)

-   {

-      RESTORE_STACK;

-      return OPUS_BAD_ARG;

-   }

+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)

+      delay_compensation = 0;

+   else

+      delay_compensation = st->delay_compensation;

+   frame_size = compute_frame_size(pcm, analysis_frame_size,

+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,

+         delay_compensation, downmix_float, st->analysis.subframe_mem);

    ALLOC(in, frame_size*st->channels, opus_int16);

    for (i=0;i<frame_size*st->channels;i++)

       in[i] = FLOAT2INT16(pcm[i]);

-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16);

+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);

    RESTORE_STACK;

    return ret;

 #endif

-opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,

+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,

                 unsigned char *data, opus_int32 out_data_bytes)

-   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

-   if(frame_size<0)

-   {

-      return OPUS_BAD_ARG;

-   }

-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16);

+   int frame_size;

+   int delay_compensation;

+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)

+      delay_compensation = 0;

+   else

+      delay_compensation = st->delay_compensation;

+   frame_size = compute_frame_size(pcm, analysis_frame_size,

+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,

+         delay_compensation, downmix_float, st->analysis.subframe_mem);

+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);

 #else

-opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,

+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,

       unsigned char *data, opus_int32 max_data_bytes)

    int i, ret;

-   const CELTMode *celt_mode;

+   int frame_size;

    int delay_compensation;

-   int lsb_depth;

    VARDECL(float, in);

-   AnalysisInfo analysis_info;

    ALLOC_STACK;

-   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));

    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)

       delay_compensation = 0;

    else

       delay_compensation = st->delay_compensation;

+   frame_size = compute_frame_size(pcm, analysis_frame_size,

+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,

+         delay_compensation, downmix_float, st->analysis.subframe_mem);

-   lsb_depth = IMIN(16, st->lsb_depth);

-   analysis_info.valid = 0;

-   if (st->silk_mode.complexity >= 7 && st->Fs==48000)

-   {

-      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,

-            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info);

-   } else {

-      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

-   }

-   if(frame_size<0)

-   {

-      RESTORE_STACK;

-      return OPUS_BAD_ARG;

-   }

    ALLOC(in, frame_size*st->channels, float);

    for (i=0;i<frame_size*st->channels;i++)

       in[i] = (1.0f/32768)*pcm[i];

-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info);

+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);

    RESTORE_STACK;

    return ret;

-opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,

+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,

                       unsigned char *data, opus_int32 out_data_bytes)

-   const CELTMode *celt_mode;

+   int frame_size;

    int delay_compensation;

-   int lsb_depth;

-   AnalysisInfo analysis_info;

-   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));

    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)

       delay_compensation = 0;

    else

       delay_compensation = st->delay_compensation;

-   lsb_depth = IMIN(24, st->lsb_depth);

-   analysis_info.valid = 0;

-   if (st->silk_mode.complexity >= 7 && st->Fs==48000)

-   {

-      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,

-            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info);

-   } else {

-      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

-   }

-   if(frame_size<0)

-   {

-      return OPUS_BAD_ARG;

-   }

-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info);

+   frame_size = compute_frame_size(pcm, analysis_frame_size,

+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,

+         delay_compensation, downmix_float, st->analysis.subframe_mem);

+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,

+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);

 #endif

@@ -2349,20 +2442,10 @@

             ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));

         break;

-        case OPUS_SET_ENERGY_SAVE_REQUEST:

-        {

-            opus_val16 *value = va_arg(ap, opus_val16*);

-            if (!value)

-            {

-               goto bad_arg;

-            }

-            ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value));

-        }

-        break;

         case OPUS_SET_ENERGY_MASK_REQUEST:

             opus_val16 *value = va_arg(ap, opus_val16*);

-            st->energy_masking = (value!=NULL);

+            st->energy_masking = value;

             ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));

         break;

--- a/src/opus_multistream_encoder.c

+++ b/src/opus_multistream_encoder.c

@@ -36,8 +36,11 @@

 #include <stdarg.h>

 #include "float_cast.h"

 #include "os_support.h"

-#include "analysis.h"

 #include "mathops.h"

+#include "mdct.h"

+#include "modes.h"

+#include "bands.h"

+#include "quant_bands.h"

 typedef struct {

    int nb_streams;

@@ -57,18 +60,66 @@

       {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */

};

+typedef void (*opus_copy_channel_in_func)(

+  opus_val16 *dst,

+  int dst_stride,

+  const void *src,

+  int src_stride,

+  int src_channel,

+  int frame_size

+);

 struct OpusMSEncoder {

-   TonalityAnalysisState analysis;

    ChannelLayout layout;

    int lfe_stream;

+   int application;

    int variable_duration;

    int surround;

    opus_int32 bitrate_bps;

    opus_val32 subframe_mem[3];

    /* Encoder states go here */

+   /* then opus_val32 window_mem[channels*120]; */

+   /* then opus_val32 preemph_mem[channels]; */

};

+static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)

+{

+   int s;

+   char *ptr;

+   int coupled_size, mono_size;

+   coupled_size = opus_encoder_get_size(2);

+   mono_size = opus_encoder_get_size(1);

+   ptr = (char*)st + align(sizeof(OpusMSEncoder));

+   for (s=0;s<st->layout.nb_streams;s++)

+   {

+      if (s < st->layout.nb_coupled_streams)

+         ptr += align(coupled_size);

+      else

+         ptr += align(mono_size);

+   }

+   return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));

+}

+static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)

+{

+   int s;

+   char *ptr;

+   int coupled_size, mono_size;

+   coupled_size = opus_encoder_get_size(2);

+   mono_size = opus_encoder_get_size(1);

+   ptr = (char*)st + align(sizeof(OpusMSEncoder));

+   for (s=0;s<st->layout.nb_streams;s++)

+   {

+      if (s < st->layout.nb_coupled_streams)

+         ptr += align(coupled_size);

+      else

+         ptr += align(mono_size);

+   }

+   return (opus_val32*)ptr;

+}

 static int validate_encoder_layout(const ChannelLayout *layout)

    int s;

@@ -88,7 +139,207 @@

    return 1;

+static void channel_pos(int channels, int pos[8])

+{

+   /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */

+   if (channels==4)

+   {

+      pos[0]=1;

+      pos[1]=3;

+      pos[2]=1;

+      pos[3]=3;

+   } else if (channels==3||channels==5||channels==6)

+   {

+      pos[0]=1;

+      pos[1]=2;

+      pos[2]=3;

+      pos[3]=1;

+      pos[4]=3;

+      pos[5]=0;

+   } else if (channels==7)

+   {

+      pos[0]=1;

+      pos[1]=2;

+      pos[2]=3;

+      pos[3]=1;

+      pos[4]=3;

+      pos[5]=2;

+      pos[6]=0;

+   } else if (channels==8)

+   {

+      pos[0]=1;

+      pos[1]=2;

+      pos[2]=3;

+      pos[3]=1;

+      pos[4]=3;

+      pos[5]=1;

+      pos[6]=3;

+      pos[7]=0;

+   }

+}

+#if 1

+/* Computes a rough approximation of log2(2^a + 2^b) */

+static opus_val16 logSum(opus_val16 a, opus_val16 b)

+{

+   opus_val16 max;

+   opus_val32 diff;

+   opus_val16 frac;

+   static const opus_val16 diff_table[17] = {

+         QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT),

+         QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT),

+         QCONST16(0.0028123f, DB_SHIFT)

+   };

+   int low;

+   if (a>b)

+   {

+      max = a;

+      diff = SUB32(EXTEND32(a),EXTEND32(b));

+   } else {

+      max = b;

+      diff = SUB32(EXTEND32(b),EXTEND32(a));

+   }

+   if (diff >= QCONST16(8.f, DB_SHIFT))

+      return max;

+#ifdef FIXED_POINT

+   low = SHR32(diff, DB_SHIFT-1);

+   frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT);

+#else

+   low = floor(2*diff);

+   frac = 2*diff - low;

+#endif

+   return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low]));

+}

+#else

+opus_val16 logSum(opus_val16 a, opus_val16 b)

+{

+   return log2(pow(4, a)+ pow(4, b))/2;

+}

+#endif

+void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,

+      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in

+)

+{

+   int c;

+   int i;

+   int LM;

+   int pos[8] = {0};

+   int upsample;

+   int frame_size;

+   opus_val32 bandE[21];

+   opus_val16 maskLogE[3][21];

+   VARDECL(opus_val32, in);

+   VARDECL(opus_val16, x);

+   VARDECL(opus_val32, out);

+   SAVE_STACK;

+   upsample = resampling_factor(rate);

+   frame_size = len*upsample;

+   for (LM=0;LM<=celt_mode->maxLM;LM++)

+      if (celt_mode->shortMdctSize<<LM==frame_size)

+         break;

+   ALLOC(in, frame_size+overlap, opus_val32);

+   ALLOC(x, len, opus_val16);

+   ALLOC(freq, frame_size, opus_val32);

+   channel_pos(channels, pos);

+   for (c=0;c<3;c++)

+      for (i=0;i<21;i++)

+         maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT);

+   for (c=0;c<channels;c++)

+   {

+      OPUS_COPY(in, mem+c*overlap, overlap);

+      (*copy_channel_in)(x, 1, pcm, channels, c, len);

+      preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);

+      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);

+      if (upsample != 1)

+      {

+         int bound = len;

+         for (i=0;i<bound;i++)

+            freq[i] *= upsample;

+         for (;i<frame_size;i++)

+            freq[i] = 0;

+      }

+      compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);

+      amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);

+      /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */

+      for (i=1;i<21;i++)

+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT));

+      for (i=19;i>=0;i--)

+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT));

+      if (pos[c]==1)

+      {

+         for (i=0;i<21;i++)

+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]);

+      } else if (pos[c]==3)

+      {

+         for (i=0;i<21;i++)

+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]);

+      } else if (pos[c]==2)

+      {

+         for (i=0;i<21;i++)

+         {

+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));

+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));

+         }

+      }

+#if 0

+      for (i=0;i<21;i++)

+         printf("%f ", bandLogE[21*c+i]);

+//#else

+      float sum=0;

+      for (i=0;i<21;i++)

+         sum += bandLogE[21*c+i];

+      printf("%f ", sum/21);

+#endif

+      OPUS_COPY(mem+c*overlap, in+frame_size, overlap);

+   }

+   for (i=0;i<21;i++)

+      maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]);

+   for (c=0;c<3;c++)

+      for (i=0;i<21;i++)

+         maskLogE[c][i] += QCONST16(.5f, DB_SHIFT)*log2(2.f/(channels-1));

+#if 0

+   for (c=0;c<3;c++)

+   {

+      for (i=0;i<21;i++)

+         printf("%f ", maskLogE[c][i]);

+   }

+#endif

+   for (c=0;c<channels;c++)

+   {

+      opus_val16 *mask;

+      if (pos[c]!=0)

+      {

+         mask = &maskLogE[pos[c]-1][0];

+         for (i=0;i<21;i++)

+            bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i];

+      } else {

+         for (i=0;i<21;i++)

+            bandLogE[21*c+i] = 0;

+      }

+#if 0

+      for (i=0;i<21;i++)

+         printf("%f ", bandLogE[21*c+i]);

+      printf("\n");

+#endif

+#if 0

+      float sum=0;

+      for (i=0;i<21;i++)

+         sum += bandLogE[21*c+i];

+      printf("%f ", sum/21);

+      printf("\n");

+#endif

+   }

+   RESTORE_STACK;

+}

 opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams)

    int coupled_size;

@@ -132,7 +383,9 @@

       return 0;

    size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);

    if (channels>2)

-      size += align(opus_encoder_get_size(2));

+   {

+      size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));

+   }

    return size;

@@ -161,10 +414,10 @@

    st->layout.nb_streams = streams;

    st->layout.nb_coupled_streams = coupled_streams;

    st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;

-   OPUS_CLEAR(&st->analysis,1);

    if (!surround)

       st->lfe_stream = -1;

    st->bitrate_bps = OPUS_AUTO;

+   st->application = application;

    st->variable_duration = OPUS_FRAMESIZE_ARG;

    for (i=0;i<st->layout.nb_channels;i++)

       st->layout.mapping[i] = mapping[i];

@@ -192,10 +445,8 @@

    if (surround)

-      OpusEncoder *downmix_enc;

-      downmix_enc = (OpusEncoder*)ptr;

-      ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO);

-      if(ret!=OPUS_OK)return ret;

+      OPUS_CLEAR(ms_get_preemph_mem(st), channels);

+      OPUS_CLEAR(ms_get_window_mem(st), channels*120);

    st->surround = surround;

    return OPUS_OK;

@@ -339,22 +590,6 @@

    return st;

-typedef void (*opus_copy_channel_in_func)(

-  opus_val16 *dst,

-  int dst_stride,

-  const void *src,

-  int src_stride,

-  int src_channel,

-  int frame_size

-);

-typedef void (*opus_surround_downmix_funct)(

-  opus_val16 *dst,

-  const void *src,

-  int channels,

-  int frame_size

-);

 static void surround_rate_allocation(

       OpusMSEncoder *st,

       opus_int32 *rate,

@@ -433,15 +668,11 @@

     OpusMSEncoder *st,

     opus_copy_channel_in_func copy_channel_in,

     const void *pcm,

-    int frame_size,

+    int analysis_frame_size,

     unsigned char *data,

     opus_int32 max_data_bytes,

     int lsb_depth,

-    opus_surround_downmix_funct surround_downmix

-#ifndef FIXED_POINT

-    , downmix_func downmix

-    , const void *pcm_analysis

-#endif

+    downmix_func downmix

    opus_int32 Fs;

@@ -451,32 +682,30 @@

    char *ptr;

    int tot_size;

    VARDECL(opus_val16, buf);

+   VARDECL(opus_val16, bandSMR);

    unsigned char tmp_data[MS_FRAME_TMP];

    OpusRepacketizer rp;

    opus_int32 complexity;

-#ifndef FIXED_POINT

-   AnalysisInfo analysis_info;

-#endif

    const CELTMode *celt_mode;

    opus_int32 bitrates[256];

    opus_val16 bandLogE[42];

-   opus_val16 bandLogE_mono[21];

+   opus_val32 *mem = NULL;

+   opus_val32 *preemph_mem=NULL;

+   int frame_size;

    ALLOC_STACK;

+   if (st->surround)

+   {

+      preemph_mem = ms_get_preemph_mem(st);

+      mem = ms_get_window_mem(st);

+   }

    ptr = (char*)st + align(sizeof(OpusMSEncoder));

    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));

    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity));

    opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));

-   if (400*frame_size < Fs)

-      RESTORE_STACK;

-      return OPUS_BAD_ARG;

-   }

-#ifndef FIXED_POINT

-   analysis_info.valid = 0;

-   if (complexity >= 7 && Fs==48000)

-   {

       opus_int32 delay_compensation;

       int channels;

@@ -483,13 +712,15 @@

       channels = st->layout.nb_streams + st->layout.nb_coupled_streams;

       opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));

       delay_compensation -= Fs/400;

+      frame_size = compute_frame_size(pcm, analysis_frame_size,

+            st->variable_duration, channels, Fs, st->bitrate_bps,

+            delay_compensation, downmix, st->subframe_mem);

+   }

-      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis,

-            frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info);

-   } else

-#endif

+   if (400*frame_size < Fs)

-      frame_size = frame_size_select(frame_size, st->variable_duration, Fs);

+      RESTORE_STACK;

+      return OPUS_BAD_ARG;

    /* Validate frame_size before using it to allocate stack space.

       This mirrors the checks in opus_encode[_float](). */

@@ -504,42 +735,10 @@

    coupled_size = opus_encoder_get_size(2);

    mono_size = opus_encoder_get_size(1);

+   ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);

    if (st->surround)

-      int i;

-      unsigned char dummy[512];

-      /* Temporary kludge -- remove */

-      OpusEncoder *downmix_enc;

-      ptr = (char*)st + align(sizeof(OpusMSEncoder));

-      for (s=0;s<st->layout.nb_streams;s++)

-      {

-         if (s < st->layout.nb_coupled_streams)

-            ptr += align(coupled_size);

-         else

-            ptr += align(mono_size);

-      }

-      downmix_enc = (OpusEncoder*)ptr;

-      surround_downmix(buf, pcm, st->layout.nb_channels, frame_size);

-      opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE));

-      opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));

-      opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));

-      opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2));

-      opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth

-#ifndef FIXED_POINT

-            , &analysis_info

-#endif

-            );

-      /* Combines the left and right mask into a centre mask. We

-         use an approximation for the log of the sum of the energies. */

-      for(i=0;i<21;i++)

-      {

-         opus_val16 diff;

-         diff = ABS16(SUB16(bandLogE[i], bandLogE[21+i]));

-         diff = diff + HALF16(diff);

-         diff = SHR32(HALF32(celt_exp2(-diff)), 16-DB_SHIFT);

-         bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]) + diff;

-      }

+      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);

    if (max_data_bytes < 4*st->layout.nb_streams-1)

@@ -563,10 +762,24 @@

       opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));

       if (st->surround)

-         opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));

-         opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));

+         opus_int32 equiv_rate;

+         equiv_rate = st->bitrate_bps;

+         if (frame_size*50 < Fs)

+            equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;

+         if (equiv_rate > 112000)

+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));

+         else if (equiv_rate > 76000)

+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND));

+         else if (equiv_rate > 48000)

+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND));

+         else

+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));

          if (s < st->layout.nb_coupled_streams)

+         {

+            /* To preserve the spatial image, force stereo CELT on coupled streams */

+            opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));

             opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));

+         }

@@ -578,11 +791,13 @@

       OpusEncoder *enc;

       int len;

       int curr_max;

+      int c1, c2;

       opus_repacketizer_init(&rp);

       enc = (OpusEncoder*)ptr;

       if (s < st->layout.nb_coupled_streams)

+         int i;

          int left, right;

          left = get_left_channel(&st->layout, s, -1);

          right = get_right_channel(&st->layout, s, -1);

@@ -591,28 +806,39 @@

          (*copy_channel_in)(buf+1, 2,

             pcm, st->layout.nb_channels, right, frame_size);

          ptr += align(coupled_size);

-         /* FIXME: This isn't correct for the coupled center channels in

-            6.1 surround configuration */

          if (st->surround)

-            opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));

+         {

+            for (i=0;i<21;i++)

+            {

+               bandLogE[i] = bandSMR[21*left+i];

+               bandLogE[21+i] = bandSMR[21*right+i];

+            }

+         }

+         c1 = left;

+         c2 = right;

       } else {

+         int i;

          int chan = get_mono_channel(&st->layout, s, -1);

          (*copy_channel_in)(buf, 1,

             pcm, st->layout.nb_channels, chan, frame_size);

          ptr += align(mono_size);

          if (st->surround)

-            opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono));

+         {

+            for (i=0;i<21;i++)

+               bandLogE[i] = bandSMR[21*chan+i];

+         }

+         c1 = chan;

+         c2 = -1;

+      if (st->surround)

+         opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));

       /* number of bytes left (+Toc) */

       curr_max = max_data_bytes - tot_size;

       /* Reserve three bytes for the last stream and four for the others */

       curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);

       curr_max = IMIN(curr_max,MS_FRAME_TMP);

-      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth

-#ifndef FIXED_POINT

-            , &analysis_info

-#endif

-            );

+      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,

+            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);

       if (len<0)

          RESTORE_STACK;

@@ -626,50 +852,12 @@

       data += len;

       tot_size += len;

+   /*printf("\n");*/

    RESTORE_STACK;

    return tot_size;

-static void channel_pos(int channels, int pos[8])

-{

-   /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */

-   if (channels==4)

-   {

-      pos[0]=1;

-      pos[1]=3;

-      pos[2]=1;

-      pos[3]=3;

-   } else if (channels==3||channels==5||channels==6)

-   {

-      pos[0]=1;

-      pos[1]=2;

-      pos[2]=3;

-      pos[3]=1;

-      pos[4]=3;

-      pos[5]=0;

-   } else if (channels==7)

-   {

-      pos[0]=1;

-      pos[1]=2;

-      pos[2]=3;

-      pos[3]=1;

-      pos[4]=3;

-      pos[5]=2;

-      pos[6]=0;

-   } else if (channels==8)

-   {

-      pos[0]=1;

-      pos[1]=2;

-      pos[2]=3;

-      pos[3]=1;

-      pos[4]=3;

-      pos[5]=1;

-      pos[6]=3;

-      pos[7]=0;

-   }

-}

 #if !defined(DISABLE_FLOAT_API)

 static void opus_copy_channel_in_float(

   opus_val16 *dst,

@@ -690,58 +878,7 @@

       dst[i*dst_stride] = float_src[i*src_stride+src_channel];

 #endif

-static void opus_surround_downmix_float(

-  opus_val16 *dst,

-  const void *src,

-  int channels,

-  int frame_size

-)

-{

-   const float *float_src;

-   opus_int32 i;

-   int pos[8] = {0};

-   int c;

-   float_src = (const float *)src;

-   channel_pos(channels, pos);

-   for (i=0;i<2*frame_size;i++)

-      dst[i]=0;

-   for (c=0;c<channels;c++)

-   {

-      if (pos[c]==1)

-      {

-         for (i=0;i<frame_size;i++)

-#if defined(FIXED_POINT)

-            dst[2*i] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);

-#else

-            dst[2*i] += float_src[i*channels+c];

 #endif

-      } else if (pos[c]==3)

-      {

-         for (i=0;i<frame_size;i++)

-#if defined(FIXED_POINT)

-            dst[2*i+1] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);

-#else

-            dst[2*i+1] += float_src[i*channels+c];

-#endif

-      } else if (pos[c]==2)

-      {

-         for (i=0;i<frame_size;i++)

-         {

-#if defined(FIXED_POINT)

-            dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15);

-            dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15);

-#else

-            dst[2*i] += .707f*float_src[i*channels+c];

-            dst[2*i+1] += .707f*float_src[i*channels+c];

-#endif

-         }

-      }

-   }

-}

-#endif

 static void opus_copy_channel_in_short(

   opus_val16 *dst,

@@ -763,58 +900,7 @@

 #endif

-static void opus_surround_downmix_short(

-  opus_val16 *dst,

-  const void *src,

-  int channels,

-  int frame_size

-)

-{

-   const opus_int16 *short_src;

-   opus_int32 i;

-   int pos[8] = {0};

-   int c;

-   short_src = (const opus_int16 *)src;

-   channel_pos(channels, pos);

-   for (i=0;i<2*frame_size;i++)

-      dst[i]=0;

-   for (c=0;c<channels;c++)

-   {

-      if (pos[c]==1)

-      {

-         for (i=0;i<frame_size;i++)

-#if defined(FIXED_POINT)

-            dst[2*i] += SHR16(short_src[i*channels+c],3);

-#else

-            dst[2*i] += (1/32768.f)*short_src[i*channels+c];

-#endif

-      } else if (pos[c]==3)

-      {

-         for (i=0;i<frame_size;i++)

-#if defined(FIXED_POINT)

-            dst[2*i+1] += SHR16(short_src[i*channels+c],3);

-#else

-            dst[2*i+1] += (1/32768.f)*short_src[i*channels+c];

-#endif

-      } else if (pos[c]==2)

-      {

-         for (i=0;i<frame_size;i++)

-         {

-#if defined(FIXED_POINT)

-            dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15);

-            dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15);

-#else

-            dst[2*i] += (.707f/32768.f)*short_src[i*channels+c];

-            dst[2*i+1] += (.707f/32768.f)*short_src[i*channels+c];

-#endif

-         }

-      }

-   }

-}

 #ifdef FIXED_POINT

 int opus_multistream_encode(

     OpusMSEncoder *st,

@@ -825,7 +911,7 @@

    return opus_multistream_encode_native(st, opus_copy_channel_in_short,

-      pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short);

+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);

 #ifndef DISABLE_FLOAT_API

@@ -838,7 +924,7 @@

    return opus_multistream_encode_native(st, opus_copy_channel_in_float,

-      pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_float);

+      pcm, frame_size, data, max_data_bytes, 16, downmix_float);

 #endif

@@ -853,9 +939,8 @@

     opus_int32 max_data_bytes

-   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;

    return opus_multistream_encode_native(st, opus_copy_channel_in_float,

-      pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset);

+      pcm, frame_size, data, max_data_bytes, 24, downmix_float);

 int opus_multistream_encode(

@@ -866,9 +951,8 @@

     opus_int32 max_data_bytes

-   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;

    return opus_multistream_encode_native(st, opus_copy_channel_in_short,

-      pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset);

+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);

 #endif

--- a/src/opus_private.h

+++ b/src/opus_private.h

@@ -82,9 +82,9 @@

 #define OPUS_SET_FORCE_MODE_REQUEST    11002

 #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)

-typedef void (*downmix_func)(const void *, float *, int, int, int);

-void downmix_float(const void *_x, float *sub, int subframe, int offset, int C);

-void downmix_int(const void *_x, float *sub, int subframe, int offset, int C);

+typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);

+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);

+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);

 int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,

                 int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,

@@ -94,12 +94,13 @@

 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);

+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,

+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,

+      int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem);

 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,

-      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth

-#ifndef FIXED_POINT

-                , AnalysisInfo *analysis_info

-#endif

-      );

+      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,

+      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);

 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,

       opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,

--- a/src/tansig_table.h

+++ b/src/tansig_table.h

@@ -1,6 +1,6 @@

 /* This file is auto-generated by gen_tables */

-static const opus_val16 tansig_table[201] = {

+static const float tansig_table[201] = {

 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,

 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,

 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,