shithub: opus

--- a/celt/celt_encoder.c

+++ b/celt/celt_encoder.c

@@ -1329,7 +1329,7 @@

       prefilter_tapset = st->tapset_decision;

       pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);

-      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3

+      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)

             && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))

          pitch_change = 1;

       if (pf_on==0)

@@ -1353,15 +1353,17 @@

    isTransient = 0;

    shortBlocks = 0;

+   if (st->complexity >= 1)

+   {

+      isTransient = transient_analysis(in, N+st->overlap, CC,

+            &tf_estimate, &tf_chan);

+   }

    if (LM>0 && ec_tell(enc)+3<=total_bits)

-      if (st->complexity >= 1)

-      {

-         isTransient = transient_analysis(in, N+st->overlap, CC,

-                  &tf_estimate, &tf_chan);

-         if (isTransient)

-            shortBlocks = M;

-      }

+      if (isTransient)

+         shortBlocks = M;

+   } else {

+      isTransient = 0;

    ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */

--- a/include/opus_defines.h

+++ b/include/opus_defines.h

@@ -148,8 +148,9 @@

 #define OPUS_GET_GAIN_REQUEST                4045 /* Should have been 4035 */

 #define OPUS_SET_LSB_DEPTH_REQUEST           4036

 #define OPUS_GET_LSB_DEPTH_REQUEST           4037

 #define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039

+#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040

+#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041

 /* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */

@@ -185,6 +186,15 @@

 #define OPUS_BANDWIDTH_SUPERWIDEBAND         1104 /**<12 kHz bandpass @hideinitializer*/

 #define OPUS_BANDWIDTH_FULLBAND              1105 /**<20 kHz bandpass @hideinitializer*/

+#define OPUS_FRAMESIZE_ARG                   5000 /**< Select frame size from the argument (default) */

+#define OPUS_FRAMESIZE_2_5_MS                5001 /**< Use 2.5 ms frames */

+#define OPUS_FRAMESIZE_5_MS                  5002 /**< Use 5 ms frames */

+#define OPUS_FRAMESIZE_10_MS                 5003 /**< Use 10 ms frames */

+#define OPUS_FRAMESIZE_20_MS                 5004 /**< Use 20 ms frames */

+#define OPUS_FRAMESIZE_40_MS                 5005 /**< Use 40 ms frames */

+#define OPUS_FRAMESIZE_60_MS                 5006 /**< Use 60 ms frames */

+#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */

 /**@}*/

@@ -525,6 +535,32 @@

   * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).

   * @hideinitializer */

 #define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)

+/** Configures the encoder's use of variable duration frames.

+  * When enabled, the encoder is free to use a shorter frame size than the one

+  * requested in the opus_encode*() call. It is then the user's responsibility

+  * to verify how much audio was encoded by checking the ToC byte of the encoded

+  * packet. The part of the audio that was not encoded needs to be resent to the

+  * encoder for the next call. Do not use this option unless you <b>really</b>

+  * know what you are doing.

+  * @see OPUS_GET_EXPERT_VARIABLE_DURATION

+  * @param[in] x <tt>opus_int32</tt>: Allowed values:

+  * <dl>

+  * <dt>0</dt><dd>Disable variable duration (default).</dd>

+  * <dt>1</dt><dd>Enable variable duration.</dd>

+  * </dl>

+  * @hideinitializer */

+#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)

+/** Gets the encoder's configured use of variable duration frames.

+  * @see OPUS_SET_EXPERT_VARIABLE_DURATION

+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:

+  * <dl>

+  * <dt>0</dt><dd>variable duration disabled (default).</dd>

+  * <dt>1</dt><dd>variable duration enabled.</dd>

+  * </dl>

+  * @hideinitializer */

+#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)

 /**@}*/

 /** @defgroup opus_genericctls Generic CTLs

--- a/src/analysis.c

+++ b/src/analysis.c

@@ -139,10 +139,56 @@

-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)

+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)

+   int pos;

+   int curr_lookahead;

+   float psum;

+   int i;

+   pos = tonal->read_pos;

+   curr_lookahead = tonal->write_pos-tonal->read_pos;

+   if (curr_lookahead<0)

+      curr_lookahead += DETECT_SIZE;

+   if (len > 480 && pos != tonal->write_pos)

+   {

+      pos++;

+      if (pos==DETECT_SIZE)

+         pos=0;

+   }

+   if (pos == tonal->write_pos)

+      pos--;

+   if (pos<0)

+      pos = DETECT_SIZE-1;

+   OPUS_COPY(info_out, &tonal->info[pos], 1);

+   tonal->read_subframe += len/120;

+   while (tonal->read_subframe>=4)

+   {

+      tonal->read_subframe -= 4;

+      tonal->read_pos++;

+   }

+   if (tonal->read_pos>=DETECT_SIZE)

+      tonal->read_pos-=DETECT_SIZE;

+   /* Compensate for the delay in the features themselves.

+      FIXME: Need a better estimate the 10 I just made up */

+   curr_lookahead = IMAX(curr_lookahead-10, 0);

+   psum=0;

+   for (i=0;i<DETECT_SIZE-curr_lookahead;i++)

+      psum += tonal->pmusic[i];

+   for (;i<DETECT_SIZE;i++)

+      psum += tonal->pspeech[i];

+   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;

+   /*printf("%f %f\n", psum, info_out->music_prob);*/

+   info_out->music_prob = psum;

+}

+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)

+{

     int i, b;

-    const CELTMode *mode;

     const kiss_fft_state *kfft;

     kiss_fft_cpx in[480], out[480];

     int N = 480, N2=240;

@@ -163,7 +209,7 @@

     float slope=0;

     float frame_stationarity;

     float relativeE;

-    float frame_prob;

+    float frame_probs[2];

     float alpha, alphaE, alphaE2;

     float frame_loudness;

     float bandwidth_mask;

@@ -170,7 +216,8 @@

     int bandwidth=0;

     float maxE = 0;

     float noise_floor;

-    celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));

+    int remaining;

+    AnalysisInfo *info;

     tonal->last_transition++;

     alpha = 1.f/IMIN(20, 1+tonal->count);

@@ -179,27 +226,32 @@

     if (tonal->count<4)

        tonal->music_prob = .5;

-    kfft = mode->mdct.kfft[0];

-    if (C==1)

+    kfft = celt_mode->mdct.kfft[0];

+    if (tonal->count==0)

+       tonal->mem_fill = 240;

+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);

+    if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)

-       for (i=0;i<N2;i++)

-       {

-          float w = analysis_window[i];

-          in[i].r = MULT16_16(w, x[i]);

-          in[i].i = MULT16_16(w, x[N-N2+i]);

-          in[N-i-1].r = MULT16_16(w, x[N-i-1]);

-          in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);

-       }

-    } else {

-       for (i=0;i<N2;i++)

-       {

-          float w = analysis_window[i];

-          in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);

-          in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);

-          in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);

-          in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);

-       }

+       tonal->mem_fill += len;

+       /* Don't have enough to update the analysis */

+       return;

+    info = &tonal->info[tonal->write_pos++];

+    if (tonal->write_pos>=DETECT_SIZE)

+       tonal->write_pos-=DETECT_SIZE;

+    for (i=0;i<N2;i++)

+    {

+       float w = analysis_window[i];

+       in[i].r = MULT16_16(w, tonal->inmem[i]);

+       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);

+       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);

+       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);

+    }

+    OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);

+    remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);

+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);

+    tonal->mem_fill = 240 + remaining;

     opus_fft(kfft, in, out);

     for (i=1;i<N2;i++)

@@ -417,27 +469,91 @@

     features[24] = tonal->lowECount;

 #ifndef FIXED_POINT

-    mlp_process(&net, features, &frame_prob);

-    frame_prob = .5f*(frame_prob+1);

+    mlp_process(&net, features, frame_probs);

+    frame_probs[0] = .5f*(frame_probs[0]+1);

     /* Curve fitting between the MLP probability and the actual probability */

-    frame_prob = .01f + 1.21f*frame_prob*frame_prob - .23f*(float)pow(frame_prob, 10);

+    frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);

+    frame_probs[1] = .5*frame_probs[1]+.5;

+    frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5;

-    /*printf("%f\n", frame_prob);*/

+    /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/

        float tau, beta;

        float p0, p1;

-       float max_certainty;

        /* One transition every 3 minutes */

-       tau = .00005f;

-       beta = .1f;

-       max_certainty = .01f+1.f/(20.f+.5f*tonal->last_transition);

+       tau = .00005f*frame_probs[1];

+       beta = .05f;

+       if (1) {

+          /* Adapt beta based on how "unexpected" the new prob is */

+          float p, q;

+          p = MAX16(.05f,MIN16(.95f,frame_probs[0]));

+          q = MAX16(.05f,MIN16(.95f,tonal->music_prob));

+          beta = .01+.05*ABS16(p-q)/(p*(1-q)+q*(1-p));

+       }

        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;

        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;

-       p0 *= (float)pow(1-frame_prob, beta);

-       p1 *= (float)pow(frame_prob, beta);

-       tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));

+       p0 *= (float)pow(1-frame_probs[0], beta);

+       p1 *= (float)pow(frame_probs[0], beta);

+       tonal->music_prob = p1/(p0+p1);

        info->music_prob = tonal->music_prob;

-       /*printf("%f %f\n", frame_prob, info->music_prob);*/

+       float psum=1e-20;

+       float speech0 = (float)pow(1-frame_probs[0], beta);

+       float music0  = (float)pow(frame_probs[0], beta);

+       if (tonal->count==1)

+       {

+          tonal->pspeech[0]=.5;

+          tonal->pmusic [0]=.5;

+       }

+       float s0, m0;

+       s0 = tonal->pspeech[0] + tonal->pspeech[1];

+       m0 = tonal->pmusic [0] + tonal->pmusic [1];

+       tonal->pspeech[0] = s0*(1-tau)*speech0;

+       tonal->pmusic [0] = m0*(1-tau)*music0;

+       for (i=1;i<DETECT_SIZE-1;i++)

+       {

+          tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;

+          tonal->pmusic [i] = tonal->pmusic [i+1]*music0;

+       }

+       tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;

+       tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;

+       for (i=0;i<DETECT_SIZE;i++)

+          psum += tonal->pspeech[i] + tonal->pmusic[i];

+       psum = 1.f/psum;

+       for (i=0;i<DETECT_SIZE;i++)

+       {

+          tonal->pspeech[i] *= psum;

+          tonal->pmusic [i] *= psum;

+       }

+       psum = tonal->pmusic[0];

+       for (i=1;i<DETECT_SIZE;i++)

+          psum += tonal->pspeech[i];

+       /* Estimate our confidence in the speech/music decisions */

+       if (frame_probs[1]>.75)

+       {

+          if (tonal->music_prob>.9)

+          {

+             float adapt;

+             adapt = 1.f/(++tonal->music_confidence_count);

+             tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);

+             tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);

+          }

+          if (tonal->music_prob<.1)

+          {

+             float adapt;

+             adapt = 1.f/(++tonal->speech_confidence_count);

+             tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);

+             tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);

+          }

+       } else {

+          if (tonal->music_confidence_count==0)

+             tonal->music_confidence = .9;

+          if (tonal->speech_confidence_count==0)

+             tonal->speech_confidence = .1;

+       }

+       psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum));

     if (tonal->last_music != (tonal->music_prob>.5f))

        tonal->last_transition=0;

@@ -465,4 +581,48 @@

     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/

     info->noisiness = frame_noisiness;

     info->valid = 1;

+    if (info_out!=NULL)

+       OPUS_COPY(info_out, info, 1);

+}

+int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,

+                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,

+                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)

+{

+   int offset;

+   int pcm_len;

+   /* Avoid overflow/wrap-around of the analysis buffer */

+   frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);

+   pcm_len = frame_size - analysis->analysis_offset;

+   offset = 0;

+   do {

+      tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);

+      offset += 480;

+      pcm_len -= 480;

+   } while (pcm_len>0);

+   analysis->analysis_offset = frame_size;

+   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)

+   {

+      int LM = 3;

+      LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps,

+            analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);

+      while ((Fs/400<<LM)>frame_size)

+         LM--;

+      frame_size = (Fs/400<<LM);

+   } else {

+      frame_size = frame_size_select(frame_size, variable_duration, Fs);

+   }

+   if (frame_size<0)

+      return -1;

+   analysis->analysis_offset -= frame_size;

+   /* Only perform analysis up to 20-ms frames. Longer ones will be split if

+      they're in CELT-only mode. */

+   analysis_info->valid = 0;

+   tonality_get_info(analysis, analysis_info, frame_size);

+   return frame_size;

--- a/src/analysis.h

+++ b/src/analysis.h

@@ -28,18 +28,27 @@

 #ifndef ANALYSIS_H

 #define ANALYSIS_H

+#include "celt.h"

+#include "opus_private.h"

 #define NB_FRAMES 8

 #define NB_TBANDS 18

 #define NB_TOT_BANDS 21

+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */

+#define DETECT_SIZE 200

 typedef struct {

    float angle[240];

    float d_angle[240];

    float d2_angle[240];

+   float inmem[ANALYSIS_BUF_SIZE];

+   int   mem_fill;                      /* number of usable samples in the buffer */

    float prev_band_tonality[NB_TBANDS];

    float prev_tonality;

    float E[NB_FRAMES][NB_TBANDS];

-   float lowE[NB_TBANDS], highE[NB_TBANDS];

+   float lowE[NB_TBANDS];

+   float highE[NB_TBANDS];

    float meanE[NB_TOT_BANDS];

    float mem[32];

    float cmean[8];

@@ -52,9 +61,27 @@

    int last_transition;

    int count;

    int opus_bandwidth;

+   opus_val32   subframe_mem[3];

+   int analysis_offset;

+   float pspeech[DETECT_SIZE];

+   float pmusic[DETECT_SIZE];

+   float speech_confidence;

+   float music_confidence;

+   int speech_confidence_count;

+   int music_confidence_count;

+   int write_pos;

+   int read_pos;

+   int read_subframe;

+   AnalysisInfo info[DETECT_SIZE];

 } TonalityAnalysisState;

 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,

-     CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);

+     const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);

+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);

+int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,

+                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,

+                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);

 #endif

--- a/src/mlp_data.c

+++ b/src/mlp_data.c

@@ -3,74 +3,103 @@

 #include "mlp.h"

-/* RMS error was 0.179835, seed was 1322103961 */

+/* RMS error was 0.138320, seed was 1361535663 */

-static const float weights[271] = {

+static const float weights[422] = {

 /* hidden layer */

-1.55597f, -0.0739792f, -0.0646761f, -0.099531f, -0.0794943f,

-0.0180174f, -0.0391354f, 0.0508224f, -0.0160169f, -0.0773263f,

--0.0300002f, -0.0865361f, 0.124477f, -0.28648f, -0.0860702f,

--0.518949f, -0.0873341f, -0.235393f, -0.907833f, -0.383573f,

-0.535388f, -0.57944f, 0.98116f, 0.8482f, 1.12426f,

--3.23721f, -0.647072f, -0.0265139f, 0.0711052f, -0.00125666f,

--0.0396181f, -0.44282f, -0.510495f, -0.201865f, 0.0134336f,

--0.167205f, -0.155406f, 0.00041678f, -0.00468705f, -0.0233224f,

-0.264279f, -0.301375f, 0.00234895f, 0.0144741f, -0.137535f,

-0.200323f, 0.0192027f, 3.19818f, 2.03495f, 0.705517f,

--4.6025f, -0.11485f, -0.792716f, 0.150714f, 0.10608f,

-0.240633f, 0.0690698f, 0.0695297f, 0.124819f, 0.0501433f,

-0.0460952f, 0.147639f, 0.10327f, 0.158007f, 0.113714f,

-0.0276191f, 0.0680749f, -0.130012f, 0.0796126f, 0.133067f,

-0.51495f, 0.747578f, -0.128742f, 5.98112f, -1.16698f,

--0.276492f, -1.73549f, -3.90234f, 2.01489f, -0.040118f,

--0.113002f, -0.146751f, -0.113569f, 0.0534873f, 0.0989832f,

-0.0872875f, 0.049266f, 0.0367557f, -0.00889148f, -0.0648461f,

--0.00190352f, 0.0143773f, 0.0259364f, -0.0592133f, -0.0672924f,

-0.1399f, -0.0987886f, -0.347402f, 0.101326f, -0.0680876f,

-0.469186f, 0.246922f, 10.4017f, 3.44846f, -0.662725f,

--0.0328208f, -0.0561274f, -0.0167744f, 0.00044282f, -0.0457645f,

--0.0408314f, -0.013113f, -0.0373873f, -0.0474122f, -0.0273745f,

--0.0308505f, 0.000582959f, -0.0421135f, 0.464859f, 0.196842f,

-0.320538f, 0.0435528f, -0.200168f, 0.266475f, -0.0853727f,

-1.20397f, 0.711542f, -1.04397f, -1.47759f, 1.26768f,

-0.446958f, 0.266477f, -0.30802f, 0.28431f, -0.118541f,

-0.00836345f, 0.0689026f, -0.0137996f, -0.0395417f, 0.26982f,

--0.206255f, 0.16066f, 0.114757f, 0.359587f, -0.106503f,

--0.0948534f, 0.175358f, -0.122966f, -0.0056675f, 0.483848f,

--0.134916f, -0.427567f, -0.140172f, -1.0866f, -2.73921f,

-0.549843f, 0.17685f, 0.0010675f, -0.00137386f, 0.0884424f,

--0.0698736f, -0.00174136f, 0.0718775f, -0.0396849f, 0.0448056f,

-0.0577853f, -0.0372353f, 0.134599f, 0.0260656f, 0.140322f,

-0.22704f, -0.020568f, -0.0142424f, -0.21723f, -0.997704f,

--0.884573f, -0.163495f, 2.33617f, 0.224142f, 0.19635f,

--0.957387f, 0.144678f, 1.47035f, -0.00700498f, -0.0472309f,

--0.0137848f, -0.0189145f, 0.00856479f, 0.0316965f, 0.00613373f,

-0.00209807f, 0.00270964f, -0.0490206f, 0.0105712f, -0.0465045f,

--0.0381532f, -0.0985268f, -0.108297f, 0.0146409f, -0.0040718f,

--0.0698572f, -0.380568f, -0.230479f, 3.98917f, 0.457652f,

--1.02355f, -7.4435f, -0.475314f, 1.61743f, 0.0254017f,

--0.00791293f, 0.047217f, 0.0220995f, -0.0304311f, 0.0052168f,

--0.0404054f, -0.0230293f, 0.00169229f, -0.0138178f, 0.0043137f,

--0.0598088f, -0.133601f, 0.0555138f, -0.177358f, -0.159856f,

--0.137281f, 0.108051f, -0.305973f, 0.393775f, 0.0747287f,

-0.783993f, -0.875086f, 1.06862f, 0.340519f, -0.352681f,

--0.0830912f, -0.100017f, 0.0729085f, -0.00829403f, 0.027489f,

--0.0779597f, 0.082286f, -0.164181f, -0.41519f, 0.00282335f,

--0.29573f, 0.125571f, 0.726935f, 0.392137f, 0.491348f,

-0.0723196f, -0.0259758f, -0.0636332f, -0.452384f, -0.000225974f,

--2.34001f, 2.45211f, -0.544628f, 5.62944f, -3.44507f,

+-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,

+-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,

+-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,

+0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,

+0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,

+24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,

+-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,

+-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,

+-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,

+1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,

+15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,

+0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,

+-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,

+0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,

+0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,

+-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,

+-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,

+-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,

+0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,

+-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,

+2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,

+0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,

+-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,

+0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,

+0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,

+-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,

+5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,

+-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,

+-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,

+-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,

+1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,

+-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,

+-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,

+0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,

+0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,

+-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,

+10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,

+-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,

+-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,

+-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,

+0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,

+-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,

+0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,

+0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,

+-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,

+0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,

+-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,

+-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,

+-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,

+-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,

+-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,

+5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,

+1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,

+0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,

+-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,

+0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,

+-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,

+-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,

+0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,

+-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,

+-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,

+0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,

+-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,

+0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,

+-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,

+-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,

+0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,

+-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,

+0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,

+-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,

+0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,

+-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,

+4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,

+0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,

+-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,

+0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,

+0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,

+3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,

 /* output layer */

--3.13835f, 0.994751f, 0.444901f, 1.59518f, 1.23665f,

-3.37012f, -1.34606f, 1.99131f, 1.33476f, 1.3885f,

-1.12559f, };

+-0.381439, 0.12115, -0.906927, 2.93878, 1.6388,

+0.882811, 0.874344, 1.21726, -0.874545, 0.321706,

+0.785055, 0.946558, -0.575066, -3.46553, 0.884905,

+0.0924047, -9.90712, 0.391338, 0.160103, -2.04954,

+4.1455, 0.0684029, -0.144761, -0.285282, 0.379244,

+-1.1584, -0.0277241, -9.85, -4.82386, 3.71333,

+3.87308, 3.52558, };

-static const int topo[3] = {25, 10, 1};

+static const int topo[3] = {25, 15, 2};

 const MLP net = {

-	3,

-	topo,

-	weights

+    3,

+    topo,

+    weights

};

--- a/src/mlp_train.c

+++ b/src/mlp_train.c

@@ -106,6 +106,7 @@

 #define MAX_NEURONS 100

+#define MAX_OUT 10

 double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate)

@@ -120,7 +121,8 @@

 	double netOut[MAX_NEURONS];

 	double error[MAX_NEURONS];

-        *error_rate = 0;

+	for (i=0;i<outDim;i++)

+	   error_rate[i] = 0;

 	topo = net->topo;

 	inDim = net->topo[0];

 	hiddenDim = net->topo[1];

@@ -153,7 +155,7 @@

 			netOut[i] = tansig_approx(sum);

 			error[i] = out[i] - netOut[i];

 			rms += error[i]*error[i];

-			*error_rate += fabs(error[i])>1;

+			error_rate[i] += fabs(error[i])>1;

 			/*error[i] = error[i]/(1+fabs(error[i]));*/

 		/* Back-propagate error */

@@ -194,7 +196,7 @@

 	double *W0_grad;

 	double *W1_grad;

 	double rms;

-	double error_rate;

+	double error_rate[MAX_OUT];

};

 void *gradient_thread_process(void *_arg)

@@ -213,7 +215,7 @@

 		sem_wait(&sem_begin[arg->id]);

 		if (arg->done)

 			break;

-		arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, &arg->error_rate);

+		arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, arg->error_rate);

 		sem_post(&sem_end[arg->id]);

 	fprintf(stderr, "done\n");

@@ -295,7 +297,7 @@

 	for (e=0;e<nbEpoch;e++)

 		double rms=0;

-                double error_rate = 0;

+		double error_rate[2] = {0,0};

 		for (i=0;i<NB_THREADS;i++)

 			sem_post(&sem_begin[i]);

@@ -306,7 +308,8 @@

 			sem_wait(&sem_end[i]);

 			rms += args[i].rms;

-			error_rate += args[i].error_rate;

+			error_rate[0] += args[i].error_rate[0];

+            error_rate[1] += args[i].error_rate[1];

 			for (j=0;j<W0_size;j++)

 				W0_grad[j] += args[i].W0_grad[j];

 			for (j=0;j<W1_size;j++)

@@ -315,8 +318,9 @@

 		float mean_rate = 0, min_rate = 1e10;

 		rms = (rms/(outDim*nbSamples));

-		error_rate = (error_rate/(outDim*nbSamples));

-		fprintf (stderr, "%f (%f %f) ", error_rate, rms, best_rms);

+		error_rate[0] = (error_rate[0]/(nbSamples));

+        error_rate[1] = (error_rate[1]/(nbSamples));

+		fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, best_rms);

 		if (rms < best_rms)

 			best_rms = rms;

@@ -445,6 +449,7 @@

 	outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));

 	seed = time(NULL);

+    /*seed = 1361480659;*/

 	fprintf (stderr, "Seed is %u\n", seed);

 	srand(seed);

 	build_tansig_table();

--- a/src/opus_demo.c

+++ b/src/opus_demo.c

@@ -53,6 +53,7 @@

     fprintf(stderr, "-d                   : only runs the decoder (reads the bit-stream as input)\n" );

     fprintf(stderr, "-cbr                 : enable constant bitrate; default: variable bitrate\n" );

     fprintf(stderr, "-cvbr                : enable constrained variable bitrate; default: unconstrained\n" );

+    fprintf(stderr, "-variable-duration   : enable frames of variable duration (experts only); default: disabled\n" );

     fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" );

     fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" );

     fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" );

@@ -221,6 +222,8 @@

     short *in, *out;

     int application=OPUS_APPLICATION_AUDIO;

     double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg;

+    double tot_samples=0;

+    opus_uint64 tot_in, tot_out;

     int bandwidth=-1;

     const char *bandwidth_string;

     int lost = 0, lost_prev = 1;

@@ -239,6 +242,10 @@

     int curr_mode=0;

     int curr_mode_count=0;

     int mode_switch_time = 48000;

+    int nb_encoded;

+    int remaining=0;

+    int variable_duration=OPUS_FRAMESIZE_ARG;

+    int delayed_decision=0;

     if (argc < 5 )

@@ -246,6 +253,7 @@

        return EXIT_FAILURE;

+    tot_in=tot_out=0;

     fprintf(stderr, "%s\n", opus_get_version_string());

     args = 1;

@@ -306,7 +314,7 @@

     forcechannels = OPUS_AUTO;

     use_dtx = 0;

     packet_loss_perc = 0;

-    max_frame_size = 960*6;

+    max_frame_size = 2*48000;

     curr_read=0;

     while( args < argc - 2 ) {

@@ -374,6 +382,14 @@

             check_encoder_option(decode_only, "-cvbr");

             cvbr = 1;

             args++;

+        } else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) {

+            check_encoder_option(decode_only, "-variable-duration");

+            variable_duration = OPUS_FRAMESIZE_VARIABLE;

+            args++;

+        } else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) {

+            check_encoder_option(decode_only, "-delayed-decision");

+            delayed_decision = 1;

+            args++;

         } else if( strcmp( argv[ args ], "-dtx") == 0 ) {

             check_encoder_option(decode_only, "-dtx");

             use_dtx = 1;

@@ -499,6 +515,7 @@

        opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&skip));

        opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(16));

+       opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));

     if (!encode_only)

@@ -554,6 +571,26 @@

     if ( use_inbandfec ) {

         data[1] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));

+    if(delayed_decision)

+    {

+       if (variable_duration!=OPUS_FRAMESIZE_VARIABLE)

+       {

+          if (frame_size==sampling_rate/400)

+             variable_duration = OPUS_FRAMESIZE_2_5_MS;

+          else if (frame_size==sampling_rate/200)

+             variable_duration = OPUS_FRAMESIZE_5_MS;

+          else if (frame_size==sampling_rate/100)

+             variable_duration = OPUS_FRAMESIZE_10_MS;

+          else if (frame_size==sampling_rate/50)

+             variable_duration = OPUS_FRAMESIZE_20_MS;

+          else if (frame_size==sampling_rate/25)

+             variable_duration = OPUS_FRAMESIZE_40_MS;

+          else

+             variable_duration = OPUS_FRAMESIZE_60_MS;

+          opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));

+       }

+       frame_size = 2*48000;

+    }

     while (!stop)

         if (delayed_celt)

@@ -617,22 +654,28 @@

                 opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));

                 frame_size = mode_list[curr_mode][2];

-            err = fread(fbytes, sizeof(short)*channels, frame_size, fin);

+            err = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);

             curr_read = err;

+            tot_in += curr_read;

             for(i=0;i<curr_read*channels;i++)

                 opus_int32 s;

                 s=fbytes[2*i+1]<<8|fbytes[2*i];

                 s=((s&0xFFFF)^0x8000)-0x8000;

-                in[i]=s;

+                in[i+remaining*channels]=s;

-            if (curr_read < frame_size)

+            if (curr_read+remaining < frame_size)

-                for (i=curr_read*channels;i<frame_size*channels;i++)

+                for (i=(curr_read+remaining)*channels;i<frame_size*channels;i++)

                    in[i] = 0;

-                stop = 1;

+                if (encode_only || decode_only)

+                   stop = 1;

             len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);

+            nb_encoded = opus_packet_get_samples_per_frame(data[toggle], sampling_rate)*opus_packet_get_nb_frames(data[toggle], len[toggle]);

+            remaining = frame_size-nb_encoded;

+            for(i=0;i<remaining*channels;i++)

+               in[i] = in[nb_encoded*channels+i];

             if (sweep_bps!=0)

                bitrate_bps += sweep_bps;

@@ -681,6 +724,7 @@

                fprintf(stderr, "Error writing.\n");

                return EXIT_FAILURE;

+            tot_samples += nb_encoded;

         } else {

             int output_samples;

             lost = len[toggle]==0 || (packet_loss_perc>0 && rand()%100 < packet_loss_perc);

@@ -703,6 +747,11 @@

                 if (output_samples>0)

+                    if (!decode_only && tot_out + output_samples > tot_in)

+                    {

+                       stop=1;

+                       output_samples  = tot_in-tot_out;

+                    }

                     if (output_samples>skip) {

                        int i;

                        for(i=0;i<(output_samples-skip)*channels;i++)

@@ -716,6 +765,7 @@

                           fprintf(stderr, "Error writing.\n");

                           return EXIT_FAILURE;

+                       tot_out += output_samples-skip;

                     if (output_samples<skip) skip -= output_samples;

                     else skip = 0;

@@ -723,6 +773,7 @@

                    fprintf(stderr, "error decoding frame: %s\n",

                                    opus_strerror(output_samples));

+                tot_samples += output_samples;

@@ -767,7 +818,7 @@

         toggle = (toggle + use_inbandfec) & 1;

     fprintf (stderr, "average bitrate:             %7.3f kb/s\n",

-                     1e-3*bits*sampling_rate/(frame_size*(double)count));

+                     1e-3*bits*sampling_rate/tot_samples);

     fprintf (stderr, "maximum bitrate:             %7.3f kb/s\n",

                      1e-3*bits_max*sampling_rate/frame_size);

     if (!decode_only)

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -67,6 +67,7 @@

     opus_int32   Fs;

     int          use_vbr;

     int          vbr_constraint;

+    int          variable_duration;

     opus_int32   bitrate_bps;

     opus_int32   user_bitrate_bps;

     int          lsb_depth;

@@ -89,7 +90,8 @@

     opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];

 #ifndef FIXED_POINT

     TonalityAnalysisState analysis;

-    int                   detected_bandwidth;

+    int          detected_bandwidth;

+    int          analysis_offset;

 #endif

     opus_uint32  rangeFinal;

};

@@ -213,6 +215,7 @@

     st->voice_ratio = -1;

     st->encoder_buffer = st->Fs/100;

     st->lsb_depth = 24;

+    st->variable_duration = OPUS_FRAMESIZE_ARG;

     /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead

        + 1.5 ms for SILK resamplers and stereo prediction) */

@@ -535,8 +538,258 @@

     return st->user_bitrate_bps;

+#ifndef FIXED_POINT

+/* Don't use more than 60 ms for the frame size analysis */

+#define MAX_DYNAMIC_FRAMESIZE 24

+/* Estimates how much the bitrate will be boosted based on the sub-frame energy */

+static float transient_boost(const float *E, const float *E_1, int LM, int maxM)

+{

+   int i;

+   int M;

+   float sumE=0, sumE_1=0;

+   float metric;

+   M = IMIN(maxM, (1<<LM)+1);

+   for (i=0;i<M;i++)

+   {

+      sumE += E[i];

+      sumE_1 += E_1[i];

+   }

+   metric = sumE*sumE_1/(M*M);

+   /*if (LM==3)

+      printf("%f\n", metric);*/

+   /*return metric>10 ? 1 : 0;*/

+   /*return MAX16(0,1-exp(-.25*(metric-2.)));*/

+   return MIN16(1,sqrt(MAX16(0,.05*(metric-2))));

+}

+/* Viterbi decoding trying to find the best frame size combination using look-ahead

+   State numbering:

+    0: unused

+    1:  2.5 ms

+    2:  5 ms (#1)

+    3:  5 ms (#2)

+    4: 10 ms (#1)

+    5: 10 ms (#2)

+    6: 10 ms (#3)

+    7: 10 ms (#4)

+    8: 20 ms (#1)

+    9: 20 ms (#2)

+   10: 20 ms (#3)

+   11: 20 ms (#4)

+   12: 20 ms (#5)

+   13: 20 ms (#6)

+   14: 20 ms (#7)

+   15: 20 ms (#8)

+*/

+static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)

+{

+   int i;

+   float cost[MAX_DYNAMIC_FRAMESIZE][16];

+   int states[MAX_DYNAMIC_FRAMESIZE][16];

+   float best_cost;

+   int best_state;

+   /* Makes variable framesize less aggressive at lower bitrates, but I can't

+      find any valid theretical justification for this (other than it seems

+      to help) */

+   frame_cost *= 720/rate;

+   for (i=0;i<16;i++)

+   {

+      /* Impossible state */

+      states[0][i] = -1;

+      cost[0][i] = 1e10;

+   }

+   for (i=0;i<4;i++)

+   {

+      cost[0][1<<i] = frame_cost + rate*(1<<i)*transient_boost(E, E_1, i, N+1);

+      states[0][1<<i] = i;

+   }

+   for (i=1;i<N;i++)

+   {

+      int j;

+      /* Follow continuations */

+      for (j=2;j<16;j++)

+      {

+         cost[i][j] = cost[i-1][j-1];

+         states[i][j] = j-1;

+      }

+      /* New frames */

+      for(j=0;j<4;j++)

+      {

+         int k;

+         float min_cost;

+         float curr_cost;

+         states[i][1<<j] = 1;

+         min_cost = cost[i-1][1];

+         for(k=1;k<4;k++)

+         {

+            float tmp = cost[i-1][(1<<(k+1))-1];

+            if (tmp < min_cost)

+            {

+               states[i][1<<j] = (1<<(k+1))-1;

+               min_cost = tmp;

+            }

+         }

+         curr_cost = frame_cost+rate*(1<<j)*transient_boost(E+i, E_1+i, j, N-i+1);

+         cost[i][1<<j] = min_cost;

+         /* If part of the frame is outside the analysis window, only count part of the cost */

+         if (N-i < (1<<j))

+            cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);

+         else

+            cost[i][1<<j] += curr_cost;

+      }

+   }

+   best_state=1;

+   best_cost = cost[N-1][1];

+   /* Find best end state (doesn't force a frame to end at N-1) */

+   for (i=2;i<16;i++)

+   {

+      if (cost[N-1][i]<best_cost)

+      {

+         best_cost = cost[N-1][i];

+         best_state = i;

+      }

+   }

+   /* Follow transitions back */

+   for (i=N-1;i>=0;i--)

+   {

+      /*printf("%d ", best_state);*/

+      best_state = states[i][best_state];

+   }

+   /*printf("%d\n", best_state);*/

+   return best_state;

+}

+void downmix_float(const void *_x, float *sub, int subframe, int offset, int C)

+{

+   const float *x;

+   int c, j;

+   x = (const float *)_x;

+   for (j=0;j<subframe;j++)

+      sub[j] = x[(j+offset)*C];

+   for (c=1;c<C;c++)

+      for (j=0;j<subframe;j++)

+         sub[j] += x[(j+offset)*C+c];

+}

+void downmix_int(const void *_x, float *sub, int subframe, int offset, int C)

+{

+   const opus_int16 *x;

+   int c, j;

+   x = (const opus_int16 *)_x;

+   for (j=0;j<subframe;j++)

+      sub[j] = x[(j+offset)*C];

+   for (c=1;c<C;c++)

+      for (j=0;j<subframe;j++)

+         sub[j] += x[(j+offset)*C+c];

+}

+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,

+                int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,

+                downmix_func downmix)

+{

+   int N;

+   int i;

+   float e[MAX_DYNAMIC_FRAMESIZE+4];

+   float e_1[MAX_DYNAMIC_FRAMESIZE+3];

+   float memx;

+   int bestLM=0;

+   int subframe;

+   int pos;

+   VARDECL(opus_val16, sub);

+   subframe = Fs/400;

+   ALLOC(sub, subframe, opus_val16);

+   e[0]=mem[0];

+   e_1[0]=1./(EPSILON+mem[0]);

+   if (buffering)

+   {

+      /* Consider the CELT delay when not in restricted-lowdelay */

+      /* We assume the buffering is between 2.5 and 5 ms */

+      int offset = 2*subframe - buffering;

+      celt_assert(offset>=0 && offset <= subframe);

+      x += C*offset;

+      len -= offset;

+      e[1]=mem[1];

+      e_1[1]=1./(EPSILON+mem[1]);

+      e[2]=mem[2];

+      e_1[2]=1./(EPSILON+mem[2]);

+      pos = 3;

+   } else {

+      pos=1;

+   }

+   N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);

+   memx = x[0];

+   for (i=0;i<N;i++)

+   {

+      float tmp;

+      float tmpx;

+      int j;

+      tmp=EPSILON;

+      downmix(x, sub, subframe, i*subframe, C);

+      if (i==0)

+         memx = sub[0];

+      for (j=0;j<subframe;j++)

+      {

+         tmpx = sub[j];

+         tmp += (tmpx-memx)*(tmpx-memx);

+         memx = tmpx;

+      }

+      e[i+pos] = tmp;

+      e_1[i+pos] = 1.f/tmp;

+   }

+   /* Hack to get 20 ms working with APPLICATION_AUDIO

+      The real problem is that the corresponding memory needs to use 1.5 ms

+      from this frame and 1 ms from the next frame */

+   e[i+pos] = e[i+pos-1];

+   if (buffering)

+      N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);

+   bestLM = transient_viterbi(e, e_1, N, (1.f+.5*tonality)*(40*C+40), bitrate/400);

+   mem[0] = e[1<<bestLM];

+   if (buffering)

+   {

+      mem[1] = e[(1<<bestLM)+1];

+      mem[2] = e[(1<<bestLM)+2];

+   }

+   return bestLM;

+}

+#endif

+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)

+{

+   int new_size;

+   if (frame_size<Fs/400)

+      return -1;

+   if (variable_duration == OPUS_FRAMESIZE_ARG)

+      new_size = frame_size;

+   else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)

+      new_size = Fs/50;

+   else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)

+      new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));

+   else

+      return -1;

+   if (new_size>frame_size)

+      return -1;

+   if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&

+            50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)

+      return -1;

+   return new_size;

+}

 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,

-                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth)

+                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth

+#ifndef FIXED_POINT

+                , AnalysisInfo *analysis_info

+#endif

+                )

     void *silk_enc;

     CELTEncoder *celt_enc;

@@ -563,11 +816,7 @@

     int curr_bandwidth;

     opus_val16 HB_gain;

     opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */

-    int extra_buffer, total_buffer;

-    int perform_analysis=0;

-#ifndef FIXED_POINT

-    AnalysisInfo analysis_info;

-#endif

+    int total_buffer;

     VARDECL(opus_val16, tmp_prefill);

     ALLOC_STACK;

@@ -575,36 +824,37 @@

     max_data_bytes = IMIN(1276, out_data_bytes);

     st->rangeFinal = 0;

-    if (400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&

+    if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&

          50*frame_size != st->Fs &&  25*frame_size != st->Fs &&  50*frame_size != 3*st->Fs)

+         || (400*frame_size < st->Fs)

+         || max_data_bytes<=0

+         )

        RESTORE_STACK;

        return OPUS_BAD_ARG;

-    if (max_data_bytes<=0)

-    {

-       RESTORE_STACK;

-       return OPUS_BAD_ARG;

-    }

     silk_enc = (char*)st+st->silk_enc_offset;

     celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);

-    lsb_depth = IMIN(lsb_depth, st->lsb_depth);

-#ifndef FIXED_POINT

-    perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000;

-#endif

     if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)

        delay_compensation = 0;

     else

        delay_compensation = st->delay_compensation;

-    if (perform_analysis)

+    lsb_depth = IMIN(lsb_depth, st->lsb_depth);

+    st->voice_ratio = -1;

+#ifndef FIXED_POINT

+    st->detected_bandwidth = 0;

+    if (analysis_info->valid)

-       total_buffer = IMAX(st->Fs/200, delay_compensation);

-    } else {

-       total_buffer = delay_compensation;

+       if (st->signal_type == OPUS_AUTO)

+          st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob));

+       st->detected_bandwidth = analysis_info->opus_bandwidth;

-    extra_buffer = total_buffer-delay_compensation;

+#endif

+    total_buffer = delay_compensation;

     st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);

     frame_rate = st->Fs/frame_size;

@@ -916,7 +1166,11 @@

           /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */

           if (to_celt && i==nb_frames-1)

              st->user_forced_mode = MODE_CELT_ONLY;

-          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth);

+          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth

+#ifndef FIXED_POINT

+                , analysis_info

+#endif

+                );

           if (tmp_len<0)

              RESTORE_STACK;

@@ -942,7 +1196,6 @@

        RESTORE_STACK;

        return ret;

     curr_bandwidth = st->bandwidth;

     /* Chooses the appropriate mode for speech

@@ -981,23 +1234,8 @@

        dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);

-#ifndef FIXED_POINT

-    if (perform_analysis)

-    {

-       int nb_analysis_frames;

-       nb_analysis_frames = frame_size/(st->Fs/100);

-       for (i=0;i<nb_analysis_frames;i++)

-          tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels, lsb_depth);

-       if (st->signal_type == OPUS_AUTO)

-          st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));

-       st->detected_bandwidth = analysis_info.opus_bandwidth;

-    } else {

-       analysis_info.valid = 0;

-       st->voice_ratio = -1;

-       st->detected_bandwidth = 0;

-    }

-#endif

     /* SILK processing */

     HB_gain = Q15ONE;

     if (st->mode != MODE_CELT_ONLY)

@@ -1205,9 +1443,18 @@

         } else {

             if (st->use_vbr)

+                opus_int32 bonus=0;

+#ifndef FIXED_POINT

+                if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)

+                {

+                   bonus = (40*st->stream_channels+40)*(st->Fs/frame_size-50);

+                   if (analysis_info->valid)

+                      bonus = bonus*(1.f+.5*analysis_info->tonality);

+                }

+#endif

                 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));

                 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));

-                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));

+                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));

                 nb_compr_bytes = max_data_bytes-1-redundancy_bytes;

             } else {

                 nb_compr_bytes = bytes_target;

@@ -1222,7 +1469,7 @@

     if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)

        for (i=0;i<st->channels*st->Fs/400;i++)

-          tmp_prefill[i] = st->delay_buffer[(extra_buffer+st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];

+          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];

     for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)

@@ -1236,7 +1483,7 @@

        const CELTMode *celt_mode;

        celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));

-       gain_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels,

+       gain_fade(pcm_buf, pcm_buf,

              st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);

     st->prev_HB_gain = HB_gain;

@@ -1258,7 +1505,7 @@

             g1 *= (1.f/16384);

             g2 *= (1.f/16384);

 #endif

-            stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap,

+            stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,

                   frame_size, st->channels, celt_mode->window, st->Fs);

             st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;

@@ -1312,7 +1559,7 @@

         int err;

         celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));

         celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));

-        err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);

+        err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);

         if (err < 0)

            RESTORE_STACK;

@@ -1339,10 +1586,9 @@

         if (ec_tell(&enc) <= 8*nb_compr_bytes)

 #ifndef FIXED_POINT

-           if (perform_analysis)

-              celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));

+           celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));

 #endif

-           ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc);

+           ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);

            if (ret < 0)

               RESTORE_STACK;

@@ -1365,9 +1611,9 @@

         celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));

         /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */

-        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL);

+        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);

-        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);

+        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);

         if (err < 0)

            RESTORE_STACK;

@@ -1440,6 +1686,7 @@

    VARDECL(opus_int16, in);

    ALLOC_STACK;

+   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

    if(frame_size<0)

       RESTORE_STACK;

@@ -1459,6 +1706,12 @@

 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,

                 unsigned char *data, opus_int32 out_data_bytes)

+   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

+   if(frame_size<0)

+   {

+      RESTORE_STACK;

+      return OPUS_BAD_ARG;

+   }

    return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16);

@@ -1467,14 +1720,40 @@

       unsigned char *data, opus_int32 max_data_bytes)

    int i, ret;

+   const CELTMode *celt_mode;

+   int delay_compensation;

+   int lsb_depth;

    VARDECL(float, in);

+   AnalysisInfo analysis_info;

    ALLOC_STACK;

+   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));

+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)

+      delay_compensation = 0;

+   else

+      delay_compensation = st->delay_compensation;

+   lsb_depth = IMIN(16, st->lsb_depth);

+   analysis_info.valid = 0;

+   if (st->silk_mode.complexity >= 7 && st->Fs==48000)

+   {

+      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,

+            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info);

+   } else {

+      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

+   }

+   if(frame_size<0)

+   {

+      RESTORE_STACK;

+      return OPUS_BAD_ARG;

+   }

    ALLOC(in, frame_size*st->channels, float);

    for (i=0;i<frame_size*st->channels;i++)

       in[i] = (1.0f/32768)*pcm[i];

-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16);

+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info);

    RESTORE_STACK;

    return ret;

@@ -1481,8 +1760,35 @@

 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,

                       unsigned char *data, opus_int32 out_data_bytes)

-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24);

+   const CELTMode *celt_mode;

+   int delay_compensation;

+   int lsb_depth;

+   AnalysisInfo analysis_info;

+   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));

+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)

+      delay_compensation = 0;

+   else

+      delay_compensation = st->delay_compensation;

+   lsb_depth = IMIN(24, st->lsb_depth);

+   analysis_info.valid = 0;

+   if (st->silk_mode.complexity >= 7 && st->Fs==48000)

+   {

+      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,

+            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info);

+   } else {

+      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);

+   }

+   if(frame_size<0)

+   {

+      RESTORE_STACK;

+      return OPUS_BAD_ARG;

+   }

+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info);

 #endif

@@ -1750,6 +2056,18 @@

             *value = st->lsb_depth;

         break;

+        case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:

+        {

+            opus_int32 value = va_arg(ap, opus_int32);

+            st->variable_duration = value;

+        }

+        break;

+        case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:

+        {

+            opus_int32 *value = va_arg(ap, opus_int32*);

+            *value = st->variable_duration;

+        }

+        break;

         case OPUS_RESET_STATE:

            void *silk_enc;

@@ -1777,6 +2095,15 @@

             if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO)

                goto bad_arg;

             st->user_forced_mode = value;

+        }

+        break;

+        case CELT_GET_MODE_REQUEST:

+        {

+           const CELTMode ** value = va_arg(ap, const CELTMode**);

+           if (value==0)

+              goto bad_arg;

+           celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));

         break;

         default:

--- a/src/opus_multistream_encoder.c

+++ b/src/opus_multistream_encoder.c

@@ -36,10 +36,14 @@

 #include <stdarg.h>

 #include "float_cast.h"

 #include "os_support.h"

+#include "analysis.h"

 struct OpusMSEncoder {

+   TonalityAnalysisState analysis;

    ChannelLayout layout;

-   int bitrate;

+   int variable_duration;

+   opus_int32 bitrate_bps;

+   opus_val32 subframe_mem[3];

    /* Encoder states go here */

};

@@ -102,6 +106,8 @@

    st->layout.nb_streams = streams;

    st->layout.nb_coupled_streams = coupled_streams;

+   st->bitrate_bps = OPUS_AUTO;

+   st->variable_duration = OPUS_FRAMESIZE_ARG;

    for (i=0;i<st->layout.nb_channels;i++)

       st->layout.mapping[i] = mapping[i];

    if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))

@@ -182,6 +188,10 @@

     unsigned char *data,

     opus_int32 max_data_bytes,

     int lsb_depth

+#ifndef FIXED_POINT

+    , downmix_func downmix

+    , const void *pcm_analysis

+#endif

    opus_int32 Fs;

@@ -193,10 +203,43 @@

    VARDECL(opus_val16, buf);

    unsigned char tmp_data[MS_FRAME_TMP];

    OpusRepacketizer rp;

+   int orig_frame_size;

+   int coded_channels;

+   opus_int32 channel_rate;

+   opus_int32 complexity;

+   AnalysisInfo analysis_info;

+   const CELTMode *celt_mode;

    ALLOC_STACK;

    ptr = (char*)st + align(sizeof(OpusMSEncoder));

    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));

+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity));

+   opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));

+   if (400*frame_size < Fs)

+   {

+      RESTORE_STACK;

+      return OPUS_BAD_ARG;

+   }

+   orig_frame_size = IMIN(frame_size,Fs/50);

+#ifndef FIXED_POINT

+   analysis_info.valid = 0;

+   if (complexity >= 7 && Fs==48000)

+   {

+      opus_int32 delay_compensation;

+      int channels;

+      channels = st->layout.nb_streams + st->layout.nb_coupled_streams;

+      opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));

+      delay_compensation -= Fs/400;

+      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis,

+            frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info);

+   } else

+#endif

+   {

+      frame_size = frame_size_select(frame_size, st->variable_duration, Fs);

+   }

    /* Validate frame_size before using it to allocate stack space.

       This mirrors the checks in opus_encode[_float](). */

    if (400*frame_size != Fs && 200*frame_size != Fs &&

@@ -215,6 +258,39 @@

       RESTORE_STACK;

       return OPUS_BUFFER_TOO_SMALL;

+   /* Compute bitrate allocation between streams (this could be a lot better) */

+   coded_channels = st->layout.nb_streams + st->layout.nb_coupled_streams;

+   if (st->bitrate_bps==OPUS_AUTO)

+   {

+      channel_rate = Fs+60*Fs/orig_frame_size;

+   } else if (st->bitrate_bps==OPUS_BITRATE_MAX)

+   {

+      channel_rate = 300000;

+   } else {

+      channel_rate = st->bitrate_bps/coded_channels;

+   }

+#ifndef FIXED_POINT

+   if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)

+   {

+      opus_int32 bonus;

+      bonus = 60*(Fs/frame_size-50);

+      channel_rate += bonus;

+   }

+#endif

+   ptr = (char*)st + align(sizeof(OpusMSEncoder));

+   for (s=0;s<st->layout.nb_streams;s++)

+   {

+      OpusEncoder *enc;

+      enc = (OpusEncoder*)ptr;

+      if (s < st->layout.nb_coupled_streams)

+         ptr += align(coupled_size);

+      else

+         ptr += align(mono_size);

+      opus_encoder_ctl(enc, OPUS_SET_BITRATE(channel_rate * (s < st->layout.nb_coupled_streams ? 2 : 1)));

+   }

+   ptr = (char*)st + align(sizeof(OpusMSEncoder));

    /* Counting ToC */

    tot_size = 0;

    for (s=0;s<st->layout.nb_streams;s++)

@@ -246,7 +322,11 @@

       /* Reserve three bytes for the last stream and four for the others */

       curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);

       curr_max = IMIN(curr_max,MS_FRAME_TMP);

-      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth);

+      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth

+#ifndef FIXED_POINT

+            , &analysis_info

+#endif

+            );

       if (len<0)

          RESTORE_STACK;

@@ -345,8 +425,9 @@

     opus_int32 max_data_bytes

+   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;

    return opus_multistream_encode_native(st, opus_copy_channel_in_float,

-      pcm, frame_size, data, max_data_bytes, 24);

+      pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset);

 int opus_multistream_encode(

@@ -357,8 +438,9 @@

     opus_int32 max_data_bytes

+   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;

    return opus_multistream_encode_native(st, opus_copy_channel_in_short,

-      pcm, frame_size, data, max_data_bytes, 16);

+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset);

 #endif

@@ -378,20 +460,10 @@

    case OPUS_SET_BITRATE_REQUEST:

-      int chan, s;

       opus_int32 value = va_arg(ap, opus_int32);

-      chan = st->layout.nb_streams + st->layout.nb_coupled_streams;

-      value /= chan;

-      for (s=0;s<st->layout.nb_streams;s++)

-      {

-         OpusEncoder *enc;

-         enc = (OpusEncoder*)ptr;

-         if (s < st->layout.nb_coupled_streams)

-            ptr += align(coupled_size);

-         else

-            ptr += align(mono_size);

-         opus_encoder_ctl(enc, request, value * (s < st->layout.nb_coupled_streams ? 2 : 1));

-      }

+      if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX)

+         goto bad_arg;

+      st->bitrate_bps = value;

    break;

    case OPUS_GET_BITRATE_REQUEST:

@@ -504,7 +576,21 @@

       *value = (OpusEncoder*)ptr;

-      break;

+   break;

+   case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:

+   {

+       opus_int32 value = va_arg(ap, opus_int32);

+       if (value<0 || value>1)

+          goto bad_arg;

+       st->variable_duration = value;

+   }

+   break;

+   case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:

+   {

+       opus_int32 *value = va_arg(ap, opus_int32*);

+       *value = st->variable_duration;

+   }

+   break;

    default:

       ret = OPUS_UNIMPLEMENTED;

       break;

@@ -512,6 +598,9 @@

    va_end(ap);

    return ret;

+bad_arg:

+   va_end(ap);

+   return OPUS_BAD_ARG;

 void opus_multistream_encoder_destroy(OpusMSEncoder *st)

--- a/src/opus_private.h

+++ b/src/opus_private.h

@@ -31,6 +31,7 @@

 #include "arch.h"

 #include "opus.h"

+#include "celt.h"

 struct OpusRepacketizer {

    unsigned char toc;

@@ -81,11 +82,24 @@

 #define OPUS_SET_FORCE_MODE_REQUEST    11002

 #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)

+typedef void (*downmix_func)(const void *, float *, int, int, int);

+void downmix_float(const void *_x, float *sub, int subframe, int offset, int C);

+void downmix_int(const void *_x, float *sub, int subframe, int offset, int C);

+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,

+                int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,

+                downmix_func downmix);

 int encode_size(int size, unsigned char *data);

+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);

 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,

-      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth);

+      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth

+#ifndef FIXED_POINT

+                , AnalysisInfo *analysis_info

+#endif

+      );

 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,

       opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,