shithub: opus

--- a/celt/celt_encoder.c

+++ b/celt/celt_encoder.c

@@ -1274,7 +1274,7 @@

       prefilter_tapset = st->tapset_decision;

       pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);

-      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3

+      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)

             && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))

          pitch_change = 1;

       if (pf_on==0)

--- a/src/analysis.c

+++ b/src/analysis.c

@@ -139,7 +139,7 @@

-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)

+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int len, int C, int lsb_depth)

     int i, b;

     const CELTMode *mode;

@@ -170,6 +170,8 @@

     int bandwidth=0;

     float maxE = 0;

     float noise_floor;

+    int remaining;

     celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));

     tonal->last_transition++;

@@ -180,28 +182,43 @@

     if (tonal->count<4)

        tonal->music_prob = .5;

     kfft = mode->mdct.kfft[0];

+    if (tonal->count==0)

+       tonal->mem_fill = 240;

     if (C==1)

-       for (i=0;i<N2;i++)

-       {

-          float w = analysis_window[i];

-          in[i].r = MULT16_16(w, tonal->inmem[i]);

-          in[i].i = MULT16_16(w, x[i]);

-          in[N-i-1].r = MULT16_16(w, x[N2-i-1]);

-          in[N-i-1].i = MULT16_16(w, x[N-i-1]);

-          tonal->inmem[i] = x[N2+i];

-       }

+       for (i=0;i<IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill);i++)

+          tonal->inmem[i+tonal->mem_fill] = x[i];

     } else {

-       for (i=0;i<N2;i++)

-       {

-          float w = analysis_window[i];

-          in[i].r = MULT16_16(w, tonal->inmem[i]);

-          in[i].i = MULT16_16(w, x[2*i]+x[2*i+1]);

-          in[N-i-1].r = MULT16_16(w, x[2*(N2-i-1)]+x[2*(N2-i-1)+1]);

-          in[N-i-1].i = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);

-          tonal->inmem[i] = x[2*(N2+i)]+x[2*(N2+i)+1];

-       }

+       for (i=0;i<IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill);i++)

+          tonal->inmem[i+tonal->mem_fill] = x[2*i]+x[2*i+1];

+    }

+    if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)

+    {

+       tonal->mem_fill += len;

+       /* Don't have enough to update the analysis */

+       return;

+    }

+    for (i=0;i<N2;i++)

+    {

+       float w = analysis_window[i];

+       in[i].r = MULT16_16(w, tonal->inmem[i]);

+       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);

+       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);

+       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);

+    OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);

+    remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);

+    if (C==1)

+    {

+       for (i=0;i<remaining;i++)

+          tonal->inmem[240+i] = x[ANALYSIS_BUF_SIZE-tonal->mem_fill+i];

+    } else {

+       for (i=0;i<remaining;i++)

+          tonal->inmem[240+i] = x[2*(ANALYSIS_BUF_SIZE-tonal->mem_fill+i)]

+                              + x[2*(ANALYSIS_BUF_SIZE-tonal->mem_fill+i)+1];

+    }

+    tonal->mem_fill = 240 + remaining;

     opus_fft(kfft, in, out);

     for (i=1;i<N2;i++)

--- a/src/analysis.h

+++ b/src/analysis.h

@@ -31,16 +31,18 @@

 #define NB_FRAMES 8

 #define NB_TBANDS 18

 #define NB_TOT_BANDS 21

+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */

 typedef struct {

    float angle[240];

    float d_angle[240];

    float d2_angle[240];

-   float inmem[240];

+   float inmem[ANALYSIS_BUF_SIZE];

+   int   mem_fill;                      /* number of usable samples in the buffer */

    float prev_band_tonality[NB_TBANDS];

    float prev_tonality;

    float E[NB_FRAMES][NB_TBANDS];

-   float lowE[NB_TBANDS], highE[NB_TBANDS];

+   float lowE[NB_TBANDS];

+   float highE[NB_TBANDS];

    float meanE[NB_TOT_BANDS];

    float mem[32];

    float cmean[8];

@@ -56,6 +58,6 @@

 } TonalityAnalysisState;

 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,

-     CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);

+     CELTEncoder *celt_enc, const opus_val16 *x, int len, int C, int lsb_depth);

 #endif

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -592,18 +592,16 @@

     lsb_depth = IMIN(lsb_depth, st->lsb_depth);

 #ifndef FIXED_POINT

-    /* Only perform analysis for 10- and 20-ms frames. We don't have enough buffering for shorter

-       ones and longer ones will be split if they're in CELT-only mode. */

-    perform_analysis = st->silk_mode.complexity >= 7

-                       && (frame_size >= st->Fs/100 || frame_size >= st->Fs/50)

-                       && st->Fs==48000;

+    /* Only perform analysis up to 20-ms frames. Longer ones will be split if

+       they're in CELT-only mode. */

+    perform_analysis = st->silk_mode.complexity >= 7 && frame_size <= st->Fs/50 && st->Fs==48000;

     if (perform_analysis)

-       int nb_analysis_frames;

-       nb_analysis_frames = frame_size/(st->Fs/100);

-       for (i=0;i<nb_analysis_frames;i++)

-          tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm+i*(st->Fs/100)*st->channels, st->channels, lsb_depth);

-       if (st->signal_type == OPUS_AUTO)

+       analysis_info.valid = 0;

+       tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm, IMIN(480, frame_size), st->channels, lsb_depth);

+       if (frame_size > st->Fs/100)

+          tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm+(st->Fs/100)*st->channels, 480, st->channels, lsb_depth);

+       if (analysis_info.valid && st->signal_type == OPUS_AUTO)

           st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));

        st->detected_bandwidth = analysis_info.opus_bandwidth;

     } else {