ref: e5f6a0b40630f923f680f0e3ed0ecf06d48f0399
parent: 45134c5d80f69a506f3619ce88fbafc1b2a54434
parent: bc4ba753e5c7e50f6149add3c46e47f54ba23f05
author: Paul Brossier <[email protected]>
date: Sun Sep 16 14:06:07 EDT 2007
merge from amaury's branch
--- a/examples/aubiomfcc.c
+++ b/examples/aubiomfcc.c
@@ -23,6 +23,9 @@
fvec_t * mfcc_out;
aubio_mfcc_t * mfcc;
+uint_t n_filters = 40;
+uint_t n_coefs = 11;
+
unsigned int pos = 0; /*frames%dspblocksize*/
uint_t usepitch = 0;
@@ -49,7 +52,13 @@
//compute mfccs
aubio_mfcc_do(mfcc, fftgrain, mfcc_out);
-
+
+ uint_t coef_cnt;
+ for (coef_cnt = 0; coef_cnt < n_coefs; coef_cnt++) {
+ outmsg("%f ",mfcc_out->data[0][coef_cnt]);
+ }
+ outmsg("\n");
+
/* end of block loop */
pos = -1; /* so it will be zero next j loop */
}
@@ -64,33 +73,34 @@
write extracted mfccs
*/
- uint_t filter_cnt;
+ uint_t coef_cnt;
if (output_filename == NULL) {
- if(frames >= 4) {
- outmsg("%f\t",(frames-4)*overlap_size/(float)samplerate);
- } else if (frames < 4) {
- outmsg("%f\t",0.);
- }
- outmsg("%f",mfcc_out->data[0][0]);
- for (filter_cnt = 1; filter_cnt < mfcc_out->length; filter_cnt++) {
- outmsg(",%f",mfcc_out->data[0][filter_cnt]);
- }
- outmsg("\n");
+// if(frames >= 4) {
+// outmsg("%f\t",(frames-4)*overlap_size/(float)samplerate);
+// }
+// else if (frames < 4) {
+// outmsg("%f\t",0.);
+// }
+ //outmsg("%f ",mfcc_out->data[0][0]);
+
+
}
}
int main(int argc, char **argv) {
// params
- uint_t n_filters = 11;
+
examples_common_init(argc,argv);
- smpl_t lowfreq = 0.;
- smpl_t highfreq = samplerate;
- mfcc_out = new_fvec(n_filters,channels);
+ smpl_t lowfreq = 133.333f;
+ smpl_t highfreq = 44100.f;
+ mfcc_out = new_fvec(n_coefs,channels);
+
//populating the filter
- mfcc = new_aubio_mfcc(buffer_size, samplerate, n_filters, lowfreq, highfreq,
- channels);
-
+ mfcc = new_aubio_mfcc(buffer_size, samplerate, n_filters, n_coefs , lowfreq, highfreq, channels);
+ dump_filterbank(mfcc);
+
+
//process
examples_common_process(aubio_process,process_print);
--- /dev/null
+++ b/examples/plotfb.py
@@ -1,0 +1,30 @@
+#!/usr/bin/env python
+
+import pylab
+import numpy
+import sys
+
+
+filename=sys.argv[1]
+
+doLog=False
+if len(sys.argv)>2:
+ if sys.argv[2]=='log':
+ doLog=True
+
+mat = pylab.load(filename)
+nmat= numpy.array(mat)
+print numpy.shape(nmat)
+
+pylab.hold(True)
+
+n_filters=numpy.shape(nmat)[0]
+for i in range(n_filters):
+ if doLog==True:
+ pylab.semilogx(nmat[i,:])
+ else:
+ pylab.plot(nmat[i,:])
+
+
+pylab.hold(False)
+pylab.show()
\ No newline at end of file
--- /dev/null
+++ b/examples/plotmat.py
@@ -1,0 +1,18 @@
+#!/usr/bin/env python
+
+import pylab
+import numpy
+import sys
+
+filename=sys.argv[1]
+
+mat=pylab.load(filename)
+nmat=numpy.array(mat).T
+print numpy.shape(nmat)
+
+
+pylab.matshow(nmat, cmap=pylab.cm.gray, aspect='auto')
+#pylab.imshow(nmat, cmap=pylab.cm.gray, aspect='auto', interpolation=False)
+#pylab.contour(nmat, cmap=pylab.cm.gray, aspect='auto')
+
+pylab.show()
\ No newline at end of file
--- a/examples/utils.c
+++ b/examples/utils.c
@@ -39,8 +39,12 @@
aubio_onsetdetection_type type_onset2 = aubio_onset_complex;
smpl_t threshold = 0.3;
smpl_t silence = -90.;
-uint_t buffer_size = 512; //1024;
-uint_t overlap_size = 256; //512;
+// uint_t buffer_size = 512;
+// uint_t overlap_size = 256;
+uint_t buffer_size = 1024;
+uint_t overlap_size = 512;
+// uint_t buffer_size = 2048;
+// uint_t overlap_size = 1024;
uint_t channels = 1;
uint_t samplerate = 44100;
@@ -60,20 +64,6 @@
int isonset = 0;
aubio_pickpeak_t * parms;
-/* mfcc objects */
-//parameters
-uint_t n_filters=20;
-smpl_t lowfreq=80.f;
-smpl_t highfreq=18000.f;
-// filterbank object
-aubio_filterbank_t * mf;
-
-// DCT mfft and result storage
-aubio_mfft_t * fft_dct;
-cvec_t * fftgrain_dct;
-smpl_t * mfcc_outbuf[11];
-
-
/* pitch objects */
smpl_t pitch = 0.;
aubio_pitchdetection_t * pitchdet;
@@ -313,9 +303,7 @@
obuf = new_fvec(overlap_size, channels);
fftgrain = new_cvec(buffer_size, channels);
- //init for mfcc process
- fftgrain_dct= new_cvec(n_filters, channels);
-
+
if (usepitch) {
pitchdet = new_aubio_pitchdetection(buffer_size*4,
overlap_size, channels, samplerate, type_pitch, mode_pitch);
@@ -329,10 +317,6 @@
/* phase vocoder */
pv = new_aubio_pvoc(buffer_size, overlap_size, channels);
- // dct phase vocoder
- //TODO: check size
- fft_dct = new_aubio_mfft(n_filters, channels);
-
/* onsets */
parms = new_aubio_peakpicker(threshold);
o = new_aubio_onsetdetection(type_onset,buffer_size,channels);
@@ -366,10 +350,6 @@
del_cvec(fftgrain);
del_fvec(onset);
del_fvec(woodblock);
-
- //mffc related
- del_aubio_mfft(fft_dct);
- del_cvec(fftgrain_dct);
aubio_cleanup();
}
--- a/src/filterbank.c
+++ b/src/filterbank.c
@@ -18,14 +18,13 @@
*/
-/* part of this mfcc implementation were inspired from LibXtract
- http://libxtract.sourceforge.net/
-*/
#include "aubio_priv.h"
#include "sample.h"
#include "filterbank.h"
+#include "stdio.h"
+
#define USE_EQUAL_GAIN 1
#define VERY_SMALL_NUMBER 2e-42
@@ -52,7 +51,8 @@
return fb;
}
-aubio_filterbank_t * new_aubio_filterbank_mfcc(uint_t n_filters, uint_t win_s, smpl_t samplerate, smpl_t freq_min, smpl_t freq_max){
+aubio_filterbank_t * new_aubio_filterbank_mfcc(uint_t n_filters, uint_t win_s, uint_t samplerate, smpl_t freq_min, smpl_t freq_max){
+
smpl_t nyquist = samplerate/2.;
uint_t style = 1;
aubio_filterbank_t * fb = new_aubio_filterbank(n_filters, win_s);
@@ -141,6 +141,8 @@
/*zero the rest of the array*/
for(k = next_peak + 1; k < fb->win_s; k++)
fb->filters[n]->data[0][k]=0.f;
+
+
}
free(mel_peak);
@@ -148,10 +150,178 @@
free(height_norm);
free(fft_peak);
+
return fb;
}
+/*
+FB initialization based on Slaney's auditory toolbox
+TODO:
+ *solve memory leak problems while
+ *solve quantization issues when constructing signal:
+ *bug for win_s=512
+ *corrections for win_s=1024 -> why even filters with smaller amplitude
+
+*/
+
+aubio_filterbank_t * new_aubio_filterbank_mfcc2(uint_t n_filters, uint_t win_s, uint_t samplerate, smpl_t freq_min, smpl_t freq_max){
+
+ aubio_filterbank_t * fb = new_aubio_filterbank(n_filters, win_s);
+
+
+ //slaney params
+ smpl_t lowestFrequency = 133.3333;
+ smpl_t linearSpacing = 66.66666666;
+ smpl_t logSpacing = 1.0711703;
+
+ uint_t linearFilters = 13;
+ uint_t logFilters = 27;
+ uint_t allFilters = linearFilters + logFilters;
+
+ //buffers for computing filter frequencies
+ fvec_t * freqs=new_fvec(allFilters+2 , 1);
+
+ fvec_t * lower_freqs=new_fvec( allFilters, 1);
+ fvec_t * upper_freqs=new_fvec( allFilters, 1);
+ fvec_t * center_freqs=new_fvec( allFilters, 1);
+
+
+ fvec_t * triangle_heights=new_fvec( allFilters, 1);
+ //lookup table of each bin frequency in hz
+ fvec_t * fft_freqs=new_fvec(win_s, 1);
+
+ uint_t filter_cnt, bin_cnt;
+
+ //first step: filling all the linear filter frequencies
+ for(filter_cnt=0; filter_cnt<linearFilters; filter_cnt++){
+ freqs->data[0][filter_cnt]=lowestFrequency+ filter_cnt*linearSpacing;
+ }
+ smpl_t lastlinearCF=freqs->data[0][filter_cnt-1];
+
+ //second step: filling all the log filter frequencies
+ for(filter_cnt=0; filter_cnt<logFilters+2; filter_cnt++){
+ freqs->data[0][filter_cnt+linearFilters]=lastlinearCF*(pow(logSpacing,filter_cnt+1));
+ }
+
+ //Option 1. copying interesting values to lower_freqs, center_freqs and upper freqs arrays
+ //TODO: would be nicer to have a reference to freqs->data, anyway we do not care in this init step
+
+ for(filter_cnt=0; filter_cnt<allFilters; filter_cnt++){
+ lower_freqs->data[0][filter_cnt]=freqs->data[0][filter_cnt];
+ center_freqs->data[0][filter_cnt]=freqs->data[0][filter_cnt+1];
+ upper_freqs->data[0][filter_cnt]=freqs->data[0][filter_cnt+2];
+ }
+
+
+ //computing triangle heights so that each triangle has unit area
+ for(filter_cnt=0; filter_cnt<allFilters; filter_cnt++){
+ triangle_heights->data[0][filter_cnt]=2./(upper_freqs->data[0][filter_cnt]-lower_freqs->data[0][filter_cnt]);
+ }
+
+
+ //AUBIO_DBG
+ AUBIO_DBG("filter tables frequencies\n");
+ for(filter_cnt=0; filter_cnt<allFilters; filter_cnt++)
+ AUBIO_DBG("filter n. %d %f %f %f %f\n",filter_cnt, lower_freqs->data[0][filter_cnt], center_freqs->data[0][filter_cnt], upper_freqs->data[0][filter_cnt], triangle_heights->data[0][filter_cnt]);
+
+ //filling the fft_freqs lookup table, which assigns the frequency in hz to each bin
+ for(bin_cnt=0; bin_cnt<win_s; bin_cnt++){
+ //TODO: check the formula!
+ fft_freqs->data[0][bin_cnt]= (smpl_t)samplerate* (smpl_t)bin_cnt/ (smpl_t)win_s;
+ }
+
+ //building each filter table
+ for(filter_cnt=0; filter_cnt<allFilters; filter_cnt++){
+
+ //TODO:check special case : lower freq =0
+ //calculating rise increment in mag/Hz
+ smpl_t riseInc= triangle_heights->data[0][filter_cnt]/(center_freqs->data[0][filter_cnt]-lower_freqs->data[0][filter_cnt]);
+
+
+ AUBIO_DBG("\nfilter %d",filter_cnt);
+ //zeroing begining of filter
+ AUBIO_DBG("\nzero begin\n");
+ for(bin_cnt=0; bin_cnt<win_s-1; bin_cnt++){
+ //zeroing beigining of array
+ fb->filters[filter_cnt]->data[0][bin_cnt]=0.f;
+ AUBIO_DBG(".");
+ //AUBIO_DBG("%f %f %f\n", fft_freqs->data[0][bin_cnt], fft_freqs->data[0][bin_cnt+1], lower_freqs->data[0][filter_cnt]);
+ if(fft_freqs->data[0][bin_cnt]<= lower_freqs->data[0][filter_cnt] && fft_freqs->data[0][bin_cnt+1]> lower_freqs->data[0][filter_cnt]){
+ break;
+ }
+ }
+ bin_cnt++;
+
+ AUBIO_DBG("\npos slope\n");
+ //positive slope
+ for(; bin_cnt<win_s-1; bin_cnt++){
+ AUBIO_DBG(".");
+ fb->filters[filter_cnt]->data[0][bin_cnt]=(fft_freqs->data[0][bin_cnt]-lower_freqs->data[0][filter_cnt])*riseInc;
+ //if(fft_freqs->data[0][bin_cnt]<= center_freqs->data[0][filter_cnt] && fft_freqs->data[0][bin_cnt+1]> center_freqs->data[0][filter_cnt])
+ if(fft_freqs->data[0][bin_cnt+1]> center_freqs->data[0][filter_cnt])
+ break;
+ }
+ //bin_cnt++;
+
+
+ //negative slope
+ AUBIO_DBG("\nneg slope\n");
+ for(; bin_cnt<win_s-1; bin_cnt++){
+ //AUBIO_DBG(".");
+
+ //checking whether last value is less than 0...
+ smpl_t val=triangle_heights->data[0][filter_cnt]-(fft_freqs->data[0][bin_cnt]-center_freqs->data[0][filter_cnt])*riseInc;
+ if(val>=0)
+ fb->filters[filter_cnt]->data[0][bin_cnt]=val;
+ else fb->filters[filter_cnt]->data[0][bin_cnt]=0.f;
+
+ //if(fft_freqs->data[0][bin_cnt]<= upper_freqs->data[0][bin_cnt] && fft_freqs->data[0][bin_cnt+1]> upper_freqs->data[0][filter_cnt])
+ //TODO: CHECK whether bugfix correct
+ if(fft_freqs->data[0][bin_cnt+1]> upper_freqs->data[0][filter_cnt])
+ break;
+ }
+ //bin_cnt++;
+
+ AUBIO_DBG("\nzero end\n");
+ //zeroing tail
+ for(; bin_cnt<win_s; bin_cnt++)
+ //AUBIO_DBG(".");
+ fb->filters[filter_cnt]->data[0][bin_cnt]=0.f;
+
+ }
+
+
+
+ del_fvec(freqs);
+ del_fvec(lower_freqs);
+ del_fvec(upper_freqs);
+ del_fvec(center_freqs);
+
+ del_fvec(triangle_heights);
+ del_fvec(fft_freqs);
+
+ return fb;
+
+}
+
+void aubio_dump_filterbank(aubio_filterbank_t * fb){
+
+ FILE * mlog;
+ mlog=fopen("filterbank.txt","w");
+
+ int k,n;
+ //dumping filter values
+ //smpl_t area_tmp=0.f;
+ for(n = 0; n < fb->n_filters; n++){
+ for(k = 0; k < fb->win_s; k++){
+ fprintf(mlog,"%f ",fb->filters[n]->data[0][k]);
+ }
+ fprintf(mlog,"\n");
+ }
+
+ if(mlog) fclose(mlog);
+}
void del_aubio_filterbank(aubio_filterbank_t * fb){
uint_t filter_cnt;
--- a/src/filterbank.h
+++ b/src/filterbank.h
@@ -45,15 +45,28 @@
/** filterbank initialization for mel filters
- \param nyquist nyquist frequency, i.e. half of the sampling rate
- \param style libxtract style
- \param freqmin lowest filter frequency
- \param freqmax highest filter frequency
+
+ \param n_filters number of filters
+ \param win_s window size
+ \param samplerate
+ \param freq_min lowest filter frequency
+ \param freq_max highest filter frequency
*/
-aubio_filterbank_t * new_aubio_filterbank_mfcc(uint_t n_filters, uint_t win_s, smpl_t samplerate, smpl_t freq_min, smpl_t freq_max);
+aubio_filterbank_t * new_aubio_filterbank_mfcc(uint_t n_filters, uint_t win_s, uint_t samplerate, smpl_t freq_min, smpl_t freq_max);
+/** filterbank initialization for mel filters
+ \param n_filters number of filters
+ \param win_s window size
+ \param samplerate
+ \param freq_min lowest filter frequency
+ \param freq_max highest filter frequency
+
+*/
+aubio_filterbank_t * new_aubio_filterbank_mfcc_2(uint_t n_filters, uint_t win_s, uint_t samplerate, smpl_t freq_min, smpl_t freq_max);
+
+
/** destroy filterbank object
\param fb filterbank, as returned by new_aubio_filterbank method
@@ -65,6 +78,11 @@
*/
void aubio_filterbank_do(aubio_filterbank_t * fb, cvec_t * in, fvec_t *out);
+
+/** dump filterbank filter tables in a txt file
+
+*/
+void aubio_dump_filterbank(aubio_filterbank_t * fb);
#ifdef __cplusplus
}
--- a/src/mfcc.c
+++ b/src/mfcc.c
@@ -33,7 +33,8 @@
uint_t win_s; /** grain length */
uint_t samplerate; /** sample rate (needed?) */
uint_t channels; /** number of channels */
- uint_t n_coefs; /** number of coefficients (= fb->n_filters/2 +1) */
+ uint_t n_filters; /** number of *filters */
+ uint_t n_coefs; /** number of coefficients (<= n_filters/2 +1) */
smpl_t lowfreq; /** lowest frequency for filters */
smpl_t highfreq; /** highest frequency for filters */
aubio_filterbank_t * fb; /** filter bank */
@@ -43,22 +44,24 @@
};
-aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate ,uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels){
+aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate, uint_t n_filters, uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels){
/** allocating space for mfcc object */
aubio_mfcc_t * mfcc = AUBIO_NEW(aubio_mfcc_t);
//we need (n_coefs-1)*2 filters to obtain n_coefs coefficients after dct
- uint_t n_filters = (n_coefs-1)*2;
+ //uint_t n_filters = (n_coefs-1)*2;
mfcc->win_s=win_s;
mfcc->samplerate=samplerate;
mfcc->channels=channels;
+ mfcc->n_filters=n_filters;
mfcc->n_coefs=n_coefs;
mfcc->lowfreq=lowfreq;
mfcc->highfreq=highfreq;
+
/** filterbank allocation */
- mfcc->fb = new_aubio_filterbank_mfcc(n_filters, mfcc->win_s, samplerate, lowfreq, highfreq);
+ mfcc->fb = new_aubio_filterbank_mfcc2(n_filters, mfcc->win_s, samplerate, lowfreq, highfreq);
/** allocating space for fft object (used for dct) */
mfcc->fft_dct=new_aubio_mfft(n_filters, 1);
@@ -101,11 +104,17 @@
//compute mag spectrum
aubio_mfft_do (mf->fft_dct, in, mf->fftgrain_dct);
//extract real part of fft grain
- //for(i=0; i<mf->n_coefs ;i++){
- for(i=0; i<out->length;i++){
+ for(i=0; i<mf->n_coefs ;i++){
+ //for(i=0; i<out->length;i++){
out->data[0][i]= mf->fftgrain_dct->norm[0][i]
*COS(mf->fftgrain_dct->phas[0][i]);
}
return;
+}
+
+//a bit a kludge
+void dump_filterbank(aubio_mfcc_t * mf){
+ aubio_dump_filterbank(mf->fb);
+
}
--- a/src/mfcc.h
+++ b/src/mfcc.h
@@ -44,7 +44,7 @@
\param channels number of channels
*/
-aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate ,uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels);
+aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate, uint_t n_filters, uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels);
/** delete mfcc object
\param mf mfcc object as returned by new_aubio_mfcc
@@ -68,6 +68,13 @@
*/
void aubio_dct_do(aubio_mfcc_t * mf, fvec_t *in, fvec_t *out);
+
+/** dump filterbank to log file
+
+ \param mf mfcc object as returned by new_aubio_mfcc
+
+*/
+void dump_filterbank(aubio_mfcc_t * mf);
#ifdef __cplusplus
}
--- a/src/sample.c
+++ b/src/sample.c
@@ -20,6 +20,7 @@
#include "aubio_priv.h"
#include "sample.h"
+
fvec_t * new_fvec( uint_t length, uint_t channels) {
fvec_t * s = AUBIO_NEW(fvec_t);
uint_t i,j;
--- a/swig/aubio.i
+++ b/swig/aubio.i
@@ -148,6 +148,14 @@
smpl_t aubio_zero_crossing_rate(fvec_t * input);
smpl_t aubio_spectral_centroid(cvec_t * spectrum, smpl_t samplerate);
+/* filterbank */
+
+/* mfcc */
+aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate, uint_t n_filters, uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels);
+void del_aubio_mfcc(aubio_mfcc_t *mf);
+void aubio_mfcc_do(aubio_mfcc_t *mf, cvec_t *in, fvec_t *out);
+
+
/* scale */
extern aubio_scale_t * new_aubio_scale(smpl_t flow, smpl_t fhig, smpl_t ilow, smpl_t ihig );
extern void aubio_scale_set (aubio_scale_t *s, smpl_t ilow, smpl_t ihig, smpl_t olow, smpl_t ohig);