ref: 48ac122141c317964fae2987eaea161c46538717
parent: 7ebacf430a465d000d97d6d9015f8f6061af8804
author: Jean-Marc Valin <[email protected]>
date: Tue Nov 13 21:39:27 EST 2012
Makes analysis usable for all frame sizes
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -1274,7 +1274,7 @@
prefilter_tapset = st->tapset_decision;
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
- if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
+ if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
pitch_change = 1;
if (pf_on==0)
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -139,7 +139,7 @@
}
}
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int len, int C, int lsb_depth)
{
int i, b;
const CELTMode *mode;
@@ -170,6 +170,8 @@
int bandwidth=0;
float maxE = 0;
float noise_floor;
+ int remaining;
+
celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
tonal->last_transition++;
@@ -180,28 +182,43 @@
if (tonal->count<4)
tonal->music_prob = .5;
kfft = mode->mdct.kfft[0];
+ if (tonal->count==0)
+ tonal->mem_fill = 240;
if (C==1)
{
- for (i=0;i<N2;i++)
- {
- float w = analysis_window[i];
- in[i].r = MULT16_16(w, tonal->inmem[i]);
- in[i].i = MULT16_16(w, x[i]);
- in[N-i-1].r = MULT16_16(w, x[N2-i-1]);
- in[N-i-1].i = MULT16_16(w, x[N-i-1]);
- tonal->inmem[i] = x[N2+i];
- }
+ for (i=0;i<IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill);i++)
+ tonal->inmem[i+tonal->mem_fill] = x[i];
} else {
- for (i=0;i<N2;i++)
- {
- float w = analysis_window[i];
- in[i].r = MULT16_16(w, tonal->inmem[i]);
- in[i].i = MULT16_16(w, x[2*i]+x[2*i+1]);
- in[N-i-1].r = MULT16_16(w, x[2*(N2-i-1)]+x[2*(N2-i-1)+1]);
- in[N-i-1].i = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
- tonal->inmem[i] = x[2*(N2+i)]+x[2*(N2+i)+1];
- }
+ for (i=0;i<IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill);i++)
+ tonal->inmem[i+tonal->mem_fill] = x[2*i]+x[2*i+1];
+ }
+ if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
+ {
+ tonal->mem_fill += len;
+ /* Don't have enough to update the analysis */
+ return;
+ }
+
+ for (i=0;i<N2;i++)
+ {
+ float w = analysis_window[i];
+ in[i].r = MULT16_16(w, tonal->inmem[i]);
+ in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
+ in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
+ in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
}
+ OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
+ remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
+ if (C==1)
+ {
+ for (i=0;i<remaining;i++)
+ tonal->inmem[240+i] = x[ANALYSIS_BUF_SIZE-tonal->mem_fill+i];
+ } else {
+ for (i=0;i<remaining;i++)
+ tonal->inmem[240+i] = x[2*(ANALYSIS_BUF_SIZE-tonal->mem_fill+i)]
+ + x[2*(ANALYSIS_BUF_SIZE-tonal->mem_fill+i)+1];
+ }
+ tonal->mem_fill = 240 + remaining;
opus_fft(kfft, in, out);
for (i=1;i<N2;i++)
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -31,16 +31,18 @@
#define NB_FRAMES 8
#define NB_TBANDS 18
#define NB_TOT_BANDS 21
-
+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
typedef struct {
float angle[240];
float d_angle[240];
float d2_angle[240];
- float inmem[240];
+ float inmem[ANALYSIS_BUF_SIZE];
+ int mem_fill; /* number of usable samples in the buffer */
float prev_band_tonality[NB_TBANDS];
float prev_tonality;
float E[NB_FRAMES][NB_TBANDS];
- float lowE[NB_TBANDS], highE[NB_TBANDS];
+ float lowE[NB_TBANDS];
+ float highE[NB_TBANDS];
float meanE[NB_TOT_BANDS];
float mem[32];
float cmean[8];
@@ -56,6 +58,6 @@
} TonalityAnalysisState;
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
- CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);
+ CELTEncoder *celt_enc, const opus_val16 *x, int len, int C, int lsb_depth);
#endif
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -592,18 +592,16 @@
lsb_depth = IMIN(lsb_depth, st->lsb_depth);
#ifndef FIXED_POINT
- /* Only perform analysis for 10- and 20-ms frames. We don't have enough buffering for shorter
- ones and longer ones will be split if they're in CELT-only mode. */
- perform_analysis = st->silk_mode.complexity >= 7
- && (frame_size >= st->Fs/100 || frame_size >= st->Fs/50)
- && st->Fs==48000;
+ /* Only perform analysis up to 20-ms frames. Longer ones will be split if
+ they're in CELT-only mode. */
+ perform_analysis = st->silk_mode.complexity >= 7 && frame_size <= st->Fs/50 && st->Fs==48000;
if (perform_analysis)
{
- int nb_analysis_frames;
- nb_analysis_frames = frame_size/(st->Fs/100);
- for (i=0;i<nb_analysis_frames;i++)
- tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm+i*(st->Fs/100)*st->channels, st->channels, lsb_depth);
- if (st->signal_type == OPUS_AUTO)
+ analysis_info.valid = 0;
+ tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm, IMIN(480, frame_size), st->channels, lsb_depth);
+ if (frame_size > st->Fs/100)
+ tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm+(st->Fs/100)*st->channels, 480, st->channels, lsb_depth);
+ if (analysis_info.valid && st->signal_type == OPUS_AUTO)
st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
st->detected_bandwidth = analysis_info.opus_bandwidth;
} else {