ref: 4c1a90a847a2af528cbfe6924a85ba8173e5c4f9
parent: cc344fb8ff9649a13d7628c38137f2f65cd65ec4
author: Jean-Marc Valin <[email protected]>
date: Sat Dec 28 18:14:26 EST 2013
Getting rid of some negations Since we're doing two rotations, we can invert the sign on both. Also adding a few comments for optimizing the FFT.
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -63,6 +63,7 @@
Fout = Fout_beg + i*mm;
Fout2 = Fout + m;
tw1 = st->twiddles;
+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */
for(j=0;j<m;j++)
{
kiss_fft_cpx t;
@@ -118,6 +119,8 @@
{
Fout = Fout_beg + i*mm;
tw3 = tw2 = tw1 = st->twiddles;
+ /* For non-custom modes, m=4, otherwise m is guaranteed to be a
+ multiple of 4. */
for (j=0;j<m;j++)
{
C_MUL(scratch[0],Fout[m] , *tw1 );
@@ -169,6 +172,7 @@
{
Fout = Fout_beg + i*mm;
tw1=tw2=st->twiddles;
+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */
k=m;
do {
@@ -229,6 +233,7 @@
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */
for ( u=0; u<m; ++u ) {
scratch[0] = *Fout0;
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -202,8 +202,8 @@
t1 = t[N4+i];
re = *yp++;
im = *yp++;
- yr = -S_MUL(re,t0) + S_MUL(im,t1);
- yi = -S_MUL(im,t0) - S_MUL(re,t1);
+ yr = S_MUL(re,t0) - S_MUL(im,t1);
+ yi = S_MUL(im,t0) + S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
@@ -226,8 +226,8 @@
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
- yr = -S_MUL(fp->i,t[N4+i]) + S_MUL(fp->r,t[i]);
- yi = -S_MUL(fp->r,t[N4+i]) - S_MUL(fp->i,t[i]);
+ yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+ yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
@@ -268,8 +268,8 @@
int rev;
kiss_fft_scalar yr, yi;
rev = *bitrev++;
- yr = -S_MUL(*xp2, t[i]) - S_MUL(*xp1,t[N4+i]);
- yi = S_MUL(*xp2, t[N4+i]) - S_MUL(*xp1,t[i]);
+ yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+ yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
/* We swap real and imag because we use an FFT instead of an IFFT. */
yp[2*rev+1] = yr;
yp[2*rev] = yi;
@@ -300,11 +300,11 @@
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
- yi = S_MUL(im,t0) - S_MUL(re,t1);
+ yi = S_MUL(re,t1) - S_MUL(im,t0);
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp1[1];
im = yp1[0];
- yp0[0] = -yr;
+ yp0[0] = yr;
yp1[1] = yi;
t0 = t[(N4-i-1)];
@@ -311,8 +311,8 @@
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
- yi = S_MUL(im,t0) - S_MUL(re,t1);
- yp1[0] = -yr;
+ yi = S_MUL(re,t1) - S_MUL(im,t0);
+ yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
yp1 -= 2;