ref: 37942649cc889816d53e54fd98f4bebc279efd63
parent: 7cdc5a34a436f52309e60a54a57244dcf0c7c1f5
author: Jean-Marc Valin <[email protected]>
date: Sat Mar 1 17:55:27 EST 2008
Saved 4 kB of stack usage in find_spectral_pitch() by doing the FFT in-place (sort of)
--- a/libcelt/kiss_fft.c
+++ b/libcelt/kiss_fft.c
@@ -510,7 +510,7 @@
}
}
-static
+
void kf_work(
kiss_fft_cpx * Fout,
const kiss_fft_cpx * f,
--- a/libcelt/kiss_fft.h
+++ b/libcelt/kiss_fft.h
@@ -85,6 +85,9 @@
kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem);
+void kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,
+ int in_stride,int * factors,const kiss_fft_cfg st,int N,int s2,int m2);
+
/** Internal function. Can be useful when you want to do the bit-reversing yourself */
void ki_work(kiss_fft_cpx * Fout, const kiss_fft_cpx * f, const size_t fstride,
int in_stride,int * factors,const kiss_fft_cfg st,int N,int s2,int m2);
--- a/libcelt/kiss_fftr.c
+++ b/libcelt/kiss_fftr.c
@@ -24,13 +24,6 @@
#include "kiss_fftr.h"
#include "_kiss_fft_guts.h"
-struct kiss_fftr_state{
- kiss_fft_cfg substate;
- kiss_twiddle_cpx * super_twiddles;
-#ifdef USE_SIMD
- long pad;
-#endif
-};
kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem,size_t * lenmem)
{
@@ -80,7 +73,7 @@
return st;
}
-void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata)
+void kiss_fftr_twiddles(kiss_fftr_cfg st,kiss_fft_scalar *freqdata)
{
/* input buffer timedata is stored row-wise */
int k,ncfft;
@@ -88,8 +81,6 @@
ncfft = st->substate->nfft;
- /*perform the parallel fft of two real signals packed in real,imag*/
- kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, (kiss_fft_cpx *)freqdata );
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
* contains the sum of the even-numbered elements of the input time sequence
* The imag part is the sum of the odd-numbered elements
@@ -124,6 +115,14 @@
freqdata[2*(ncfft-k)+1] = HALF_OF(tw.i - f1k.i);
}
+}
+
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata)
+{
+ /*perform the parallel fft of two real signals packed in real,imag*/
+ kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, (kiss_fft_cpx *)freqdata );
+
+ kiss_fftr_twiddles(st,freqdata);
}
void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar *timedata)
--- a/libcelt/kiss_fftr.h
+++ b/libcelt/kiss_fftr.h
@@ -15,6 +15,14 @@
*/
+struct kiss_fftr_state{
+ kiss_fft_cfg substate;
+ kiss_twiddle_cpx * super_twiddles;
+#ifdef USE_SIMD
+ long pad;
+#endif
+ };
+
typedef struct kiss_fftr_state *kiss_fftr_cfg;
--- a/libcelt/pitch.c
+++ b/libcelt/pitch.c
@@ -43,12 +43,13 @@
#include <math.h>
#include "pitch.h"
#include "psy.h"
+#include "_kiss_fft_guts.h"
+#include "kiss_fftr.h"
void find_spectral_pitch(kiss_fftr_cfg fft, struct PsyDecay *decay, celt_sig_t *x, celt_sig_t *y, int lag, int len, int C, int *pitch)
{
int c, i;
float max_corr;
- VARDECL(celt_word32_t *xx);
VARDECL(celt_word32_t *X);
VARDECL(celt_word32_t *Y);
VARDECL(celt_mask_t *curve);
@@ -55,30 +56,39 @@
int n2;
SAVE_STACK;
n2 = lag/2;
- ALLOC(xx, lag, celt_word32_t);
ALLOC(X, lag, celt_word32_t);
ALLOC(curve, n2, celt_mask_t);
-
+
for (i=0;i<lag;i++)
- xx[i] = 0;
+ X[i] = 0;
for (c=0;c<C;c++)
- for (i=0;i<len;i++)
- xx[i] += SHR32(x[C*i+c],1);
-
- kiss_fftr(fft, xx, X);
-
+ {
+ for (i=0;i<len/2;i++)
+ {
+ X[2*fft->substate->bitrev[i]] += SHR32(x[C*(2*i)+c],1);
+ X[2*fft->substate->bitrev[i]+1] += SHR32(x[C*(2*i+1)+c],1);
+ }
+ }
+ kf_work((kiss_fft_cpx*)X, NULL, 1,1, fft->substate->factors,fft->substate, 1, 1, 1);
+ kiss_fftr_twiddles(fft,X);
+
compute_masking(decay, X, curve, lag);
/* Deferred allocation to reduce peak stack usage */
ALLOC(Y, lag, celt_word32_t);
for (i=0;i<lag;i++)
- xx[i] = 0;
+ Y[i] = 0;
for (c=0;c<C;c++)
- for (i=0;i<lag;i++)
- xx[i] += SHR32(y[C*i+c],1);
- kiss_fftr(fft, xx, Y);
-
-
+ {
+ for (i=0;i<lag/2;i++)
+ {
+ Y[2*fft->substate->bitrev[i]] += SHR32(y[C*(2*i)+c],1);
+ Y[2*fft->substate->bitrev[i]+1] += SHR32(y[C*(2*i+1)+c],1);
+ }
+ }
+ kf_work((kiss_fft_cpx*)Y, NULL, 1,1, fft->substate->factors,fft->substate, 1, 1, 1);
+ kiss_fftr_twiddles(fft,Y);
+
for (i=1;i<n2;i++)
{
float n, tmp;
@@ -93,7 +103,7 @@
}
/*printf ("\n");*/
X[0] = X[1] = 0;
- kiss_fftri(fft, X, xx);
+ kiss_fftri(fft, X, Y);
/*for (i=0;i<C*lag;i++)
printf ("%d %d\n", X[i], xx[i]);*/
@@ -102,10 +112,10 @@
for (i=0;i<lag-len;i++)
{
/*printf ("%f ", xx[i]);*/
- if (xx[i] > max_corr)
+ if (Y[i] > max_corr)
{
*pitch = i;
- max_corr = xx[i];
+ max_corr = Y[i];
}
}
/*printf ("%f\n", max_corr);*/