ref: 7509fdb813e768da61b1a7db3df759d3e6efb1d1
parent: 56921ff73f55c23f1d3e8941b4098ab862e317e1
author: Jean-Marc Valin <[email protected]>
date: Thu Dec 20 17:48:35 EST 2012
New bandwidth detection code Can now work up to full-band and uses lsb_depth to fix the noise issue.
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -57,6 +57,8 @@
opus_val16 noisiness;
opus_val16 activity;
opus_val16 music_prob;
+ int bandwidth;
+ int opus_bandwidth;
}AnalysisInfo;
#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -100,6 +100,10 @@
2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
};
+static const int extra_bands[NB_TOT_BANDS+1] = {
+ 0, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
+};
+
/*static const float tweight[NB_TBANDS+1] = {
.3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
};*/
@@ -135,7 +139,7 @@
}
}
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C)
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)
{
int i, b;
const CELTMode *mode;
@@ -153,7 +157,7 @@
float features[100];
float frame_tonality;
float max_frame_tonality;
- float tw_sum=0;
+ /*float tw_sum=0;*/
float frame_noisiness;
const float pi4 = M_PI*M_PI*M_PI*M_PI;
float slope=0;
@@ -164,7 +168,8 @@
float frame_loudness;
float bandwidth_mask;
int bandwidth=0;
- float bandE[NB_TBANDS];
+ float maxE = 0;
+ float noise_floor;
celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
tonal->last_transition++;
@@ -236,7 +241,7 @@
frame_tonality = 0;
max_frame_tonality = 0;
- tw_sum = 0;
+ /*tw_sum = 0;*/
info->activity = 0;
frame_noisiness = 0;
frame_stationarity = 0;
@@ -264,19 +269,10 @@
tE += binE*tonality[i];
nE += binE*2*(.5-noisiness[i]);
}
- bandE[b] = E;
tonal->E[tonal->E_count][b] = E;
frame_noisiness += nE/(1e-15+E);
frame_loudness += sqrt(E+1e-10);
- /* Add a reasonable noise floor */
- tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
- tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E);
- /* 13 dB slope for spreading function */
- bandwidth_mask = MAX32(.05*bandwidth_mask, E);
- /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
- if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask)
- bandwidth = b;
logE[b] = log(E+1e-10);
tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01);
tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1);
@@ -317,6 +313,42 @@
tonal->prev_band_tonality[b] = band_tonality[b];
}
+ bandwidth_mask = 0;
+ bandwidth = 0;
+ for (b=0;b<NB_TOT_BANDS;b++)
+ maxE = MAX32(maxE, tonal->meanE[b]);
+ noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+ noise_floor *= noise_floor;
+ for (b=0;b<NB_TOT_BANDS;b++)
+ {
+ float E=0;
+ int band_start, band_end;
+ /* Keep a margin of 300 Hz for aliasing */
+ band_start = extra_bands[b]+3;
+ band_end = extra_bands[b+1]+3;
+ for (i=band_start;i<band_end;i++)
+ {
+ float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
+ + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+ E += binE;
+ }
+ E /= (band_end-band_start);
+ maxE = MAX32(maxE, E);
+ if (tonal->count>2)
+ {
+ tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
+ } else {
+ tonal->meanE[b] = E;
+ }
+ E = MAX32(E, tonal->meanE[b]);
+ /* 13 dB slope for spreading function */
+ bandwidth_mask = MAX32(.05*bandwidth_mask, E);
+ /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
+ if (E>.1*bandwidth_mask && E*1e10f > maxE && E > noise_floor)
+ bandwidth = b;
+ }
+ if (tonal->count<=2)
+ bandwidth = 20;
frame_loudness = 20*log10(frame_loudness);
tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
tonal->lowECount *= (1-alphaE);
@@ -417,21 +449,20 @@
printf("%f ", features[i]);
printf("\n");*/
- /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */
- if (bandwidth == NB_TBANDS-1 || tonal->count<100)
- {
+ if (bandwidth<=12 || (bandwidth==13 && tonal->opus_bandwidth == OPUS_BANDWIDTH_NARROWBAND))
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ else if (bandwidth<=14 || (bandwidth==15 && tonal->opus_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND))
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ else if (bandwidth<=16 || (bandwidth==17 && tonal->opus_bandwidth == OPUS_BANDWIDTH_WIDEBAND))
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ else if (bandwidth<=18)
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+ else
tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
- } else {
- int close_enough = 0;
- if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1)
- close_enough=1;
- if (bandwidth<=11 || (bandwidth==12 && close_enough))
- tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
- else if (bandwidth<=13)
- tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
- else if (bandwidth<=15 || (bandwidth==16 && close_enough))
- tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
- }
+
+ info->bandwidth = bandwidth;
+ info->opus_bandwidth = tonal->opus_bandwidth;
+ /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
info->noisiness = frame_noisiness;
info->valid = 1;
}
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -30,8 +30,8 @@
#define NB_FRAMES 8
#define NB_TBANDS 18
+#define NB_TOT_BANDS 21
-
typedef struct {
float angle[240];
float d_angle[240];
@@ -40,7 +40,7 @@
float prev_tonality;
float E[NB_FRAMES][NB_TBANDS];
float lowE[NB_TBANDS], highE[NB_TBANDS];
- float meanE[NB_TBANDS], meanRE[NB_TBANDS];
+ float meanE[NB_TOT_BANDS];
float mem[32];
float cmean[8];
float std[9];
@@ -55,6 +55,6 @@
} TonalityAnalysisState;
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
- CELTEncoder *celt_enc, const opus_val16 *x, int C);
+ CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);
#endif
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -69,6 +69,7 @@
int vbr_constraint;
opus_int32 bitrate_bps;
opus_int32 user_bitrate_bps;
+ int lsb_depth;
int encoder_buffer;
#define OPUS_ENCODER_RESET_START stream_channels
@@ -210,6 +211,7 @@
st->user_forced_mode = OPUS_AUTO;
st->voice_ratio = -1;
st->encoder_buffer = st->Fs/100;
+ st->lsb_depth = 24;
/* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
+ 1.5 ms for SILK resamplers and stereo prediction) */
@@ -859,6 +861,13 @@
st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND)
st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+#ifndef FIXED_POINT
+ if (analysis_info.valid)
+ {
+ st->bandwidth = IMIN(st->bandwidth, analysis_info.opus_bandwidth);
+ }
+#endif
+ celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(st->lsb_depth));
/* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */
if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8))
@@ -976,7 +985,7 @@
int nb_analysis_frames;
nb_analysis_frames = frame_size/(st->Fs/100);
for (i=0;i<nb_analysis_frames;i++)
- tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels);
+ tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels, st->lsb_depth);
if (st->signal_type == OPUS_AUTO)
st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
} else {
@@ -1700,13 +1709,15 @@
case OPUS_SET_LSB_DEPTH_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
- ret = celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(value));
+ if (value<8 || value>24)
+ goto bad_arg;
+ st->lsb_depth=value;
}
break;
case OPUS_GET_LSB_DEPTH_REQUEST:
{
opus_int32 *value = va_arg(ap, opus_int32*);
- celt_encoder_ctl(celt_enc, OPUS_GET_LSB_DEPTH(value));
+ *value = st->lsb_depth;
}
break;
case OPUS_RESET_STATE: