ref: d68e809bf8310e215c47bc334edb7d9207ed1bc5
parent: 7e0ca4337f0d740e90cd20c913bf2c79d351d046
author: Jean-Marc Valin <[email protected]>
date: Thu Jan 7 10:49:36 EST 2016
Quality: Retrained classifier MLP with better data New MLP doesn't attempt to classify silence as speech/music
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -202,6 +202,11 @@
info_out->music_prob = psum;
}
+static const float std_feature_bias[9] = {
+ 5.684947, 3.475288, 1.770634, 1.599784, 3.773215,
+ 2.163313, 1.260756, 1.116868, 1.918795
+};
+
static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
{
int i, b;
@@ -507,24 +512,25 @@
tonal->mem[i] = BFCC[i];
}
for (i=0;i<9;i++)
- features[11+i] = (float)sqrt(tonal->std[i]);
- features[20] = info->tonality;
- features[21] = info->activity;
- features[22] = frame_stationarity;
- features[23] = info->tonality_slope;
- features[24] = tonal->lowECount;
+ features[11+i] = (float)sqrt(tonal->std[i]) - std_feature_bias[i];
+ features[20] = info->tonality - 0.154723;
+ features[21] = info->activity - 0.724643;
+ features[22] = frame_stationarity - 0.743717;
+ features[23] = info->tonality_slope + 0.069216;
+ features[24] = tonal->lowECount - 0.067930;
#ifndef DISABLE_FLOAT_API
mlp_process(&net, features, frame_probs);
frame_probs[0] = .5f*(frame_probs[0]+1);
/* Curve fitting between the MLP probability and the actual probability */
- frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
+ /*frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);*/
/* Probability of active audio (as opposed to silence) */
frame_probs[1] = .5f*frame_probs[1]+.5f;
+ frame_probs[1] *= frame_probs[1];
/* Consider that silence has a 50-50 probability. */
frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
- /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
+ /*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/
{
/* Probability of state transition */
float tau;
--- a/src/mlp_data.c
+++ b/src/mlp_data.c
@@ -1,6 +1,3 @@
-/* The contents of this file was automatically generated by mlp_train.c
- It contains multi-layer perceptron (MLP) weights. */
-
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -7,103 +4,109 @@
#include "mlp.h"
-/* RMS error was 0.138320, seed was 1361535663 */
+/* RMS error was 0.230027, seed was 1452289367 */
+/* 0.009100 0.069938 (0.230027 0.230027) 1.24058e-07 5543 */
-static const float weights[422] = {
+static const float weights[450] = {
/* hidden layer */
--0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,
--0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,
--0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,
-0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,
-0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,
-24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,
--0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,
--0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,
--0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,
-1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,
-15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,
-0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,
--0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,
-0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,
-0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,
--1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,
--0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,
--0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,
-0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,
--0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,
-2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,
-0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,
--0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,
-0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,
-0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,
--4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,
-5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,
--0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,
--0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,
--0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,
-1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,
--7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,
--0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,
-0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,
-0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,
--0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,
-10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,
--0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,
--0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,
--0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,
-0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,
--0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,
-0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,
-0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,
--0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,
-0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,
--0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,
--0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,
--0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,
--0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,
--0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,
-5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,
-1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,
-0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,
--0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,
-0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,
--0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,
--975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,
-0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,
--0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,
--2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,
-0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,
--6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,
-0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,
--0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,
--0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,
-0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,
--0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,
-0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,
--0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,
-0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,
--2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,
-4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,
-0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,
--0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,
-0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,
-0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,
-3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,
-
+-1.20927f, -0.0275523f, 0.0304442f, -0.071791f, -0.0897356f,
+0.100996f, -0.0492634f, 0.070213f, 0.0187071f, 0.0042668f,
+0.0644589f, -0.10967f, -0.119688f, -0.00888386f, 0.170952f,
+0.174562f, -0.265435f, -0.0635892f, -0.284755f, -1.06453f,
+0.202855f, 2.31084f, -2.763f, -0.420894f, 0.698811f,
+6.46418f, 0.0662341f, 0.0758173f, 0.0511722f, 0.0426484f,
+0.115711f, -0.263815f, -0.0113386f, -0.189737f, -0.0929912f,
+-0.287827f, 0.0925463f, 0.0286792f, -0.0199793f, -0.193071f,
+0.258586f, 0.018504f, 0.116125f, 0.099269f, -0.00781962f,
+-0.266017f, 0.283733f, 10.5488f, -0.658286f, 0.836758f,
+13.1168f, -5.02553f, -1.0969f, -0.0738116f, 0.0204736f,
+0.0110775f, -0.00198985f, 0.00426824f, 0.148998f, 0.0755275f,
+0.112213f, -0.0518501f, 0.028398f, 0.0240943f, -0.0503666f,
+-0.149506f, -0.133575f, -0.137328f, 0.116275f, 0.238077f,
+0.080265f, 0.0387349f, 0.09185f, 4.04867f, 3.2435f,
+-0.7155f, 8.14792f, -29.8969f, 1.1575f, -0.124794f,
+0.0226943f, -0.0470538f, -0.0334476f, 0.0360859f, 0.0447789f,
+-0.00258532f, -0.0192054f, -0.113082f, 0.109513f, -0.0437787f,
+0.0382349f, -0.00994462f, -0.155653f, 0.171922f, -0.222151f,
+-0.523565f, -0.0454432f, -0.556888f, 0.761537f, -2.70075f,
+-0.883015f, 0.887168f, 0.746329f, -0.363477f, 0.360424f,
+0.034755f, -0.015404f, 0.00688472f, -0.00949269f, 0.0625642f,
+-0.050711f, 0.0370223f, 0.0149561f, 0.060385f, -0.0709806f,
+-0.036509f, 0.099007f, -0.0397276f, 0.285237f, 0.127836f,
+-0.15154f, 0.265848f, -0.0832318f, 0.0520659f, 0.897805f,
+0.439215f, -3.00803f, 1.93755f, -0.408725f, 0.300142f,
+-1.42001f, 0.118794f, -0.04621f, 0.050757f, -0.0239654f,
+-0.0629488f, -0.0083243f, -0.108989f, -0.0326831f, 0.104277f,
+-0.0667274f, 0.0475941f, 0.069182f, -0.0574944f, -0.137823f,
+-0.206978f, -0.162035f, -0.208444f, 0.141751f, -0.289377f,
+-0.7875f, 0.0911f, 0.174999f, -2.03406f, 3.06743f,
+1.22255f, 2.10659f, 0.0779022f, -0.220946f, 0.137124f,
+-0.0625512f, -0.073468f, 0.174861f, -0.139417f, 0.0967417f,
+0.0830658f, -0.223662f, 0.103016f, -0.102317f, 0.225611f,
+0.154375f, 0.187856f, -0.00878193f, 0.128648f, -0.371477f,
+-0.479037f, 0.156541f, 1.10304f, -1.26162f, 0.086939f,
+-0.143269f, 2.18318f, -2.88831f, 0.101126f, -0.308315f,
+0.222068f, -0.227709f, -0.00855236f, 0.0107035f, 0.00774349f,
+-0.0185316f, 0.0306039f, -0.233612f, 0.0807309f, -0.029933f,
+0.151942f, -0.267724f, 0.0484763f, 0.132192f, -0.230059f,
+0.357879f, 0.075414f, 0.110637f, -1.27818f, 3.3101f,
+0.831064f, -0.212367f, -20.704f, -1.1492f, 0.0312941f,
+-0.0208507f, -0.00804196f, 0.0110407f, 0.027599f, 0.00193594f,
+-0.0135057f, -0.00614977f, 0.0505432f, -0.0108098f, 0.000826042f,
+-0.0243765f, -0.323055f, 0.0682748f, -0.55873f, -0.103042f,
+0.174935f, -0.126558f, -0.104518f, 0.422479f, -0.0683178f,
+-1.44811f, 0.702109f, 0.712138f, -0.420112f, 2.59746f,
+-0.0297689f, -0.0453044f, -0.0330312f, -0.0344518f, -0.0260442f,
+-0.0610515f, 0.0916816f, 0.0256295f, -0.105187f, 0.0771212f,
+-0.0898792f, -0.186163f, -0.321019f, -0.225689f, 0.175825f,
+0.252939f, 0.738898f, 2.41919f, 0.114505f, -0.314026f,
+0.607983f, 1.73201f, -2.09609f, -0.609339f, 1.18997f,
+0.113871f, -0.177673f, -0.0785783f, -0.348033f, -0.0949274f,
+-0.0191062f, 0.335823f, -0.0578655f, 0.131259f, -0.118687f,
+-0.132123f, -0.239624f, 0.000738732f, -0.185936f, -0.13077f,
+-0.436439f, -0.141664f, 0.0353391f, -0.0536557f, -0.0964537f,
+0.221853f, 1.94264f, -1.78544f, 3.8254f, 3.74598f,
+2.37071f, -1.42709f, 0.0463179f, -0.0568602f, 0.0529534f,
+-0.103245f, -0.340972f, 0.101934f, -0.810811f, 0.176158f,
+0.469658f, 0.0248864f, -0.10734f, -0.143827f, -0.0457131f,
+0.779219f, -0.142152f, 0.0394297f, 0.160772f, -0.707623f,
+-0.608236f, 1.07106f, -1.27037f, 2.27722f, 6.3688f,
+0.519837f, -3.33262f, -0.126443f, -0.0943922f, 0.0265837f,
+0.0620709f, 0.0113266f, -0.255811f, -0.0735781f, -0.0638952f,
+-0.09543f, -0.204965f, 0.00454999f, 0.0554974f, -0.16251f,
+-0.573836f, 0.258764f, 0.19895f, 0.0219289f, -0.376757f,
+-0.508578f, -0.0767061f, -0.654512f, 4.48901f, 3.38949f,
+-2.34533f, -11.0766f, 4.35799f, 1.66794f, -0.0513934f,
+-0.0685787f, -0.0112154f, 0.000464661f, -0.234848f, -0.338596f,
+-0.142242f, -0.167476f, -0.140324f, -0.104829f, -0.104195f,
+0.0110351f, -0.112668f, 0.0872292f, -0.170777f, -0.0876985f,
+0.123348f, -0.156758f, 0.199038f, -0.056107f, 0.899269f,
+0.0820197f, -1.295f, 0.0295294f, 2.27577f, -0.940993f,
+-0.0100104f, -0.111541f, -0.132193f, -0.11037f, 0.0371375f,
+-0.0180172f, -0.0105591f, 0.0197043f, 0.04099f, -0.0538671f,
+-0.102347f, -0.0470742f, 0.178034f, -0.267772f, -0.105789f,
+-0.105376f, 0.0623262f, -0.042906f, 0.176528f, -0.160076f,
+-2.28483f, -1.92619f, 0.218149f, 9.67107f, 3.30399f,
+-1.75951f, 0.129671f, 0.118305f, 0.140766f, 0.0678099f,
+0.00313175f, -0.0144533f, -0.0310217f, -0.0245139f, 0.136948f,
+0.150137f, 0.112326f, -0.0755033f, -0.280984f, -0.249342f,
+-0.681657f, 0.0315246f, 0.294968f, 0.0407062f, 0.282759f,
+-0.344185f, -7.32828f, -0.220036f, -0.560418f, -1.87191f,
+-7.10132f,
/* output layer */
--0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f,
-0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f,
-0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f,
-0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f,
-4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f,
--1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f,
-3.87308f, 3.52558f};
+8.55144, 2.0822, 0.240592, 1.26638, 0.0309585,
+-1.09841, 0.861549, -1.53704, 1.07356, 4.39194,
+-2.60476, 0.375094, 0.122941, 0.00326393, 0.777163,
+-2.03171, -0.944556, 4.02958, -0.260741, 0.556385,
+-0.220568, -1.77121, -0.858706, -1.52023, -0.784162,
+0.345948, -0.0488489, -0.323381, -0.752573, 0.517346,
+0.876475, -1.44056, -0.382276, -1.55409, };
-static const int topo[3] = {25, 15, 2};
+static const int topo[3] = {25, 16, 2};
const MLP net = {
- 3,
- topo,
- weights
+ 3,
+ topo,
+ weights
};
--- a/src/mlp_train.c
+++ b/src/mlp_train.c
@@ -154,8 +154,10 @@
sum += W1[i*(hiddenDim+1)+j+1]*hidden[j];
netOut[i] = tansig_approx(sum);
error[i] = out[i] - netOut[i];
- rms += error[i]*error[i];
+ if (out[i] == 0) error[i] *= .0;
error_rate[i] += fabs(error[i])>1;
+ if (i==0) error[i] *= 3;
+ rms += error[i]*error[i];
/*error[i] = error[i]/(1+fabs(error[i]));*/
}
/* Back-propagate error */
@@ -449,7 +451,7 @@
outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
seed = time(NULL);
- /*seed = 1361480659;*/
+ /*seed = 1452209040;*/
fprintf (stderr, "Seed is %u\n", seed);
srand(seed);
build_tansig_table();
--- a/src/opus_demo.c
+++ b/src/opus_demo.c
@@ -54,7 +54,8 @@
fprintf(stderr, "-d : only runs the decoder (reads the bit-stream as input)\n" );
fprintf(stderr, "-cbr : enable constant bitrate; default: variable bitrate\n" );
fprintf(stderr, "-cvbr : enable constrained variable bitrate; default: unconstrained\n" );
- fprintf(stderr, "-variable-duration : enable frames of variable duration (experts only); default: disabled\n" );
+ fprintf(stderr, "-variable-duration : enable frames of variable duration (experimental, experts only); default: disabled\n" );
+ fprintf(stderr, "-delayed-decision : use look-ahead for speech/music detection (experts only); default: disabled\n" );
fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" );
fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" );
fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" );