shithub: opus

--- a/celt/celt.h

+++ b/celt/celt.h

@@ -57,6 +57,8 @@

    opus_val16 noisiness;

    opus_val16 activity;

    opus_val16 music_prob;

+   int        bandwidth;

+   int        opus_bandwidth;

 }AnalysisInfo;

 #define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))

--- a/src/analysis.c

+++ b/src/analysis.c

@@ -100,6 +100,10 @@

        2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120

};

+static const int extra_bands[NB_TOT_BANDS+1] = {

+      0, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200

+};

 /*static const float tweight[NB_TBANDS+1] = {

       .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5

 };*/

@@ -135,7 +139,7 @@

-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C)

+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)

     int i, b;

     const CELTMode *mode;

@@ -153,7 +157,7 @@

     float features[100];

     float frame_tonality;

     float max_frame_tonality;

-    float tw_sum=0;

+    /*float tw_sum=0;*/

     float frame_noisiness;

     const float pi4 = M_PI*M_PI*M_PI*M_PI;

     float slope=0;

@@ -164,7 +168,8 @@

     float frame_loudness;

     float bandwidth_mask;

     int bandwidth=0;

-    float bandE[NB_TBANDS];

+    float maxE = 0;

+    float noise_floor;

     celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));

     tonal->last_transition++;

@@ -236,7 +241,7 @@

     frame_tonality = 0;

     max_frame_tonality = 0;

-    tw_sum = 0;

+    /*tw_sum = 0;*/

     info->activity = 0;

     frame_noisiness = 0;

     frame_stationarity = 0;

@@ -264,19 +269,10 @@

           tE += binE*tonality[i];

           nE += binE*2*(.5-noisiness[i]);

-       bandE[b] = E;

        tonal->E[tonal->E_count][b] = E;

        frame_noisiness += nE/(1e-15+E);

        frame_loudness += sqrt(E+1e-10);

-       /* Add a reasonable noise floor */

-       tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;

-       tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E);

-       /* 13 dB slope for spreading function */

-       bandwidth_mask = MAX32(.05*bandwidth_mask, E);

-       /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */

-       if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask)

-          bandwidth = b;

        logE[b] = log(E+1e-10);

        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01);

        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1);

@@ -317,6 +313,42 @@

        tonal->prev_band_tonality[b] = band_tonality[b];

+    bandwidth_mask = 0;

+    bandwidth = 0;

+    for (b=0;b<NB_TOT_BANDS;b++)

+       maxE = MAX32(maxE, tonal->meanE[b]);

+    noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));

+    noise_floor *= noise_floor;

+    for (b=0;b<NB_TOT_BANDS;b++)

+    {

+       float E=0;

+       int band_start, band_end;

+       /* Keep a margin of 300 Hz for aliasing */

+       band_start = extra_bands[b]+3;

+       band_end = extra_bands[b+1]+3;

+       for (i=band_start;i<band_end;i++)

+       {

+          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r

+                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;

+          E += binE;

+       }

+       E /= (band_end-band_start);

+       maxE = MAX32(maxE, E);

+       if (tonal->count>2)

+       {

+          tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;

+       } else {

+          tonal->meanE[b] = E;

+       }

+       E = MAX32(E, tonal->meanE[b]);

+       /* 13 dB slope for spreading function */

+       bandwidth_mask = MAX32(.05*bandwidth_mask, E);

+       /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */

+       if (E>.1*bandwidth_mask && E*1e10f > maxE && E > noise_floor)

+          bandwidth = b;

+    }

+    if (tonal->count<=2)

+       bandwidth = 20;

     frame_loudness = 20*log10(frame_loudness);

     tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);

     tonal->lowECount *= (1-alphaE);

@@ -417,21 +449,20 @@

        printf("%f ", features[i]);

     printf("\n");*/

-    /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */

-    if (bandwidth == NB_TBANDS-1 || tonal->count<100)

-    {

+    if (bandwidth<=12 || (bandwidth==13 && tonal->opus_bandwidth == OPUS_BANDWIDTH_NARROWBAND))

+       tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;

+    else if (bandwidth<=14 || (bandwidth==15 && tonal->opus_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND))

+       tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;

+    else if (bandwidth<=16 || (bandwidth==17 && tonal->opus_bandwidth == OPUS_BANDWIDTH_WIDEBAND))

+       tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;

+    else if (bandwidth<=18)

+       tonal->opus_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;

+    else

        tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;

-    } else {

-       int close_enough = 0;

-       if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1)

-          close_enough=1;

-       if (bandwidth<=11 || (bandwidth==12 && close_enough))

-          tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;

-       else if (bandwidth<=13)

-          tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;

-       else if (bandwidth<=15 || (bandwidth==16 && close_enough))

-          tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;

-    }

+    info->bandwidth = bandwidth;

+    info->opus_bandwidth = tonal->opus_bandwidth;

+    /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/

     info->noisiness = frame_noisiness;

     info->valid = 1;

--- a/src/analysis.h

+++ b/src/analysis.h

@@ -30,8 +30,8 @@

 #define NB_FRAMES 8

 #define NB_TBANDS 18

+#define NB_TOT_BANDS 21

 typedef struct {

    float angle[240];

    float d_angle[240];

@@ -40,7 +40,7 @@

    float prev_tonality;

    float E[NB_FRAMES][NB_TBANDS];

    float lowE[NB_TBANDS], highE[NB_TBANDS];

-   float meanE[NB_TBANDS], meanRE[NB_TBANDS];

+   float meanE[NB_TOT_BANDS];

    float mem[32];

    float cmean[8];

    float std[9];

@@ -55,6 +55,6 @@

 } TonalityAnalysisState;

 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,

-     CELTEncoder *celt_enc, const opus_val16 *x, int C);

+     CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);

 #endif

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -69,6 +69,7 @@

     int          vbr_constraint;

     opus_int32   bitrate_bps;

     opus_int32   user_bitrate_bps;

+    int          lsb_depth;

     int          encoder_buffer;

 #define OPUS_ENCODER_RESET_START stream_channels

@@ -210,6 +211,7 @@

     st->user_forced_mode = OPUS_AUTO;

     st->voice_ratio = -1;

     st->encoder_buffer = st->Fs/100;

+    st->lsb_depth = 24;

     /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead

        + 1.5 ms for SILK resamplers and stereo prediction) */

@@ -859,6 +861,13 @@

         st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;

     if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND)

         st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;

+#ifndef FIXED_POINT

+    if (analysis_info.valid)

+    {

+       st->bandwidth = IMIN(st->bandwidth, analysis_info.opus_bandwidth);

+    }

+#endif

+    celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(st->lsb_depth));

     /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */

     if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8))

@@ -976,7 +985,7 @@

        int nb_analysis_frames;

        nb_analysis_frames = frame_size/(st->Fs/100);

        for (i=0;i<nb_analysis_frames;i++)

-          tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels);

+          tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels, st->lsb_depth);

        if (st->signal_type == OPUS_AUTO)

           st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));

     } else {

@@ -1700,13 +1709,15 @@

         case OPUS_SET_LSB_DEPTH_REQUEST:

             opus_int32 value = va_arg(ap, opus_int32);

-            ret = celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(value));

+            if (value<8 || value>24)

+               goto bad_arg;

+            st->lsb_depth=value;

         break;

         case OPUS_GET_LSB_DEPTH_REQUEST:

             opus_int32 *value = va_arg(ap, opus_int32*);

-            celt_encoder_ctl(celt_enc, OPUS_GET_LSB_DEPTH(value));

+            *value = st->lsb_depth;

         break;

         case OPUS_RESET_STATE: