shithub: opus

Download patch

ref: 37942649cc889816d53e54fd98f4bebc279efd63
parent: 7cdc5a34a436f52309e60a54a57244dcf0c7c1f5
author: Jean-Marc Valin <[email protected]>
date: Sat Mar 1 17:55:27 EST 2008

Saved 4 kB of stack usage in find_spectral_pitch() by doing the FFT in-place
(sort of)

--- a/libcelt/kiss_fft.c
+++ b/libcelt/kiss_fft.c
@@ -510,7 +510,7 @@
    }
 }
 
-static
+
 void kf_work(
         kiss_fft_cpx * Fout,
         const kiss_fft_cpx * f,
--- a/libcelt/kiss_fft.h
+++ b/libcelt/kiss_fft.h
@@ -85,6 +85,9 @@
 
 kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem); 
 
+void kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,
+             int in_stride,int * factors,const kiss_fft_cfg st,int N,int s2,int m2);
+
 /** Internal function. Can be useful when you want to do the bit-reversing yourself */
 void ki_work(kiss_fft_cpx * Fout, const kiss_fft_cpx * f, const size_t fstride,
              int in_stride,int * factors,const kiss_fft_cfg st,int N,int s2,int m2);
--- a/libcelt/kiss_fftr.c
+++ b/libcelt/kiss_fftr.c
@@ -24,13 +24,6 @@
 #include "kiss_fftr.h"
 #include "_kiss_fft_guts.h"
 
-struct kiss_fftr_state{
-    kiss_fft_cfg substate;
-    kiss_twiddle_cpx * super_twiddles;
-#ifdef USE_SIMD    
-    long pad;
-#endif    
-};
 
 kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem,size_t * lenmem)
 {
@@ -80,7 +73,7 @@
     return st;
 }
 
-void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata)
+void kiss_fftr_twiddles(kiss_fftr_cfg st,kiss_fft_scalar *freqdata)
 {
    /* input buffer timedata is stored row-wise */
    int k,ncfft;
@@ -88,8 +81,6 @@
 
    ncfft = st->substate->nfft;
 
-   /*perform the parallel fft of two real signals packed in real,imag*/
-   kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, (kiss_fft_cpx *)freqdata );
     /* The real part of the DC element of the frequency spectrum in st->tmpbuf
    * contains the sum of the even-numbered elements of the input time sequence
    * The imag part is the sum of the odd-numbered elements
@@ -124,6 +115,14 @@
       freqdata[2*(ncfft-k)+1] = HALF_OF(tw.i - f1k.i);
 
    }
+}
+
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata)
+{
+   /*perform the parallel fft of two real signals packed in real,imag*/
+   kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, (kiss_fft_cpx *)freqdata );
+
+   kiss_fftr_twiddles(st,freqdata);
 }
 
 void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar *timedata)
--- a/libcelt/kiss_fftr.h
+++ b/libcelt/kiss_fftr.h
@@ -15,6 +15,14 @@
  
  */
 
+struct kiss_fftr_state{
+      kiss_fft_cfg substate;
+      kiss_twiddle_cpx * super_twiddles;
+#ifdef USE_SIMD    
+      long pad;
+#endif    
+   };
+
 typedef struct kiss_fftr_state *kiss_fftr_cfg;
 
 
--- a/libcelt/pitch.c
+++ b/libcelt/pitch.c
@@ -43,12 +43,13 @@
 #include <math.h>
 #include "pitch.h"
 #include "psy.h"
+#include "_kiss_fft_guts.h"
+#include "kiss_fftr.h"
 
 void find_spectral_pitch(kiss_fftr_cfg fft, struct PsyDecay *decay, celt_sig_t *x, celt_sig_t *y, int lag, int len, int C, int *pitch)
 {
    int c, i;
    float max_corr;
-   VARDECL(celt_word32_t *xx);
    VARDECL(celt_word32_t *X);
    VARDECL(celt_word32_t *Y);
    VARDECL(celt_mask_t *curve);
@@ -55,30 +56,39 @@
    int n2;
    SAVE_STACK;
    n2 = lag/2;
-   ALLOC(xx, lag, celt_word32_t);
    ALLOC(X, lag, celt_word32_t);
    ALLOC(curve, n2, celt_mask_t);
-   
+
    for (i=0;i<lag;i++)
-      xx[i] = 0;
+      X[i] = 0;
    for (c=0;c<C;c++)
-      for (i=0;i<len;i++)
-         xx[i] += SHR32(x[C*i+c],1);
-   
-   kiss_fftr(fft, xx, X);
-   
+   {
+      for (i=0;i<len/2;i++)
+      {
+         X[2*fft->substate->bitrev[i]] += SHR32(x[C*(2*i)+c],1);
+         X[2*fft->substate->bitrev[i]+1] += SHR32(x[C*(2*i+1)+c],1);
+      }
+   }
+   kf_work((kiss_fft_cpx*)X, NULL, 1,1, fft->substate->factors,fft->substate, 1, 1, 1);
+   kiss_fftr_twiddles(fft,X);
+
    compute_masking(decay, X, curve, lag);
 
    /* Deferred allocation to reduce peak stack usage */
    ALLOC(Y, lag, celt_word32_t);
    for (i=0;i<lag;i++)
-      xx[i] = 0;
+      Y[i] = 0;
    for (c=0;c<C;c++)
-      for (i=0;i<lag;i++)
-         xx[i] += SHR32(y[C*i+c],1);
-   kiss_fftr(fft, xx, Y);
-   
-   
+   {
+      for (i=0;i<lag/2;i++)
+      {
+         Y[2*fft->substate->bitrev[i]] += SHR32(y[C*(2*i)+c],1);
+         Y[2*fft->substate->bitrev[i]+1] += SHR32(y[C*(2*i+1)+c],1);
+      }
+   }
+   kf_work((kiss_fft_cpx*)Y, NULL, 1,1, fft->substate->factors,fft->substate, 1, 1, 1);
+   kiss_fftr_twiddles(fft,Y);
+
    for (i=1;i<n2;i++)
    {
       float n, tmp;
@@ -93,7 +103,7 @@
    }
    /*printf ("\n");*/
    X[0] = X[1] = 0;
-   kiss_fftri(fft, X, xx);
+   kiss_fftri(fft, X, Y);
    /*for (i=0;i<C*lag;i++)
       printf ("%d %d\n", X[i], xx[i]);*/
    
@@ -102,10 +112,10 @@
    for (i=0;i<lag-len;i++)
    {
       /*printf ("%f ", xx[i]);*/
-      if (xx[i] > max_corr)
+      if (Y[i] > max_corr)
       {
          *pitch = i;
-         max_corr = xx[i];
+         max_corr = Y[i];
       }
    }
    /*printf ("%f\n", max_corr);*/