shithub: opus

Download patch

ref: 4c1a90a847a2af528cbfe6924a85ba8173e5c4f9
parent: cc344fb8ff9649a13d7628c38137f2f65cd65ec4
author: Jean-Marc Valin <[email protected]>
date: Sat Dec 28 18:14:26 EST 2013

Getting rid of some negations

Since we're doing two rotations, we can invert the sign on both.
Also adding a few comments for optimizing the FFT.

--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -63,6 +63,7 @@
       Fout = Fout_beg + i*mm;
       Fout2 = Fout + m;
       tw1 = st->twiddles;
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
       for(j=0;j<m;j++)
       {
          kiss_fft_cpx t;
@@ -118,6 +119,8 @@
       {
          Fout = Fout_beg + i*mm;
          tw3 = tw2 = tw1 = st->twiddles;
+         /* For non-custom modes, m=4, otherwise m is guaranteed to be a
+            multiple of 4. */
          for (j=0;j<m;j++)
          {
             C_MUL(scratch[0],Fout[m] , *tw1 );
@@ -169,6 +172,7 @@
    {
       Fout = Fout_beg + i*mm;
       tw1=tw2=st->twiddles;
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
       k=m;
       do {
 
@@ -229,6 +233,7 @@
       Fout3=Fout0+3*m;
       Fout4=Fout0+4*m;
 
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
       for ( u=0; u<m; ++u ) {
          scratch[0] = *Fout0;
 
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -202,8 +202,8 @@
          t1 = t[N4+i];
          re = *yp++;
          im = *yp++;
-         yr = -S_MUL(re,t0)  +  S_MUL(im,t1);
-         yi = -S_MUL(im,t0)  -  S_MUL(re,t1);
+         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
+         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
          yc.r = yr;
          yc.i = yi;
          yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
@@ -226,8 +226,8 @@
       for(i=0;i<N4;i++)
       {
          kiss_fft_scalar yr, yi;
-         yr = -S_MUL(fp->i,t[N4+i]) + S_MUL(fp->r,t[i]);
-         yi = -S_MUL(fp->r,t[N4+i]) - S_MUL(fp->i,t[i]);
+         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
          *yp1 = yr;
          *yp2 = yi;
          fp++;
@@ -268,8 +268,8 @@
          int rev;
          kiss_fft_scalar yr, yi;
          rev = *bitrev++;
-         yr = -S_MUL(*xp2, t[i]) - S_MUL(*xp1,t[N4+i]);
-         yi =  S_MUL(*xp2, t[N4+i]) - S_MUL(*xp1,t[i]);
+         yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+         yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
          /* We swap real and imag because we use an FFT instead of an IFFT. */
          yp[2*rev+1] = yr;
          yp[2*rev] = yi;
@@ -300,11 +300,11 @@
          t1 = t[N4+i];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
          yr = S_MUL(re,t0) + S_MUL(im,t1);
-         yi = S_MUL(im,t0) - S_MUL(re,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
          /* We swap real and imag because we're using an FFT instead of an IFFT. */
          re = yp1[1];
          im = yp1[0];
-         yp0[0] = -yr;
+         yp0[0] = yr;
          yp1[1] = yi;
 
          t0 = t[(N4-i-1)];
@@ -311,8 +311,8 @@
          t1 = t[(N2-i-1)];
          /* We'd scale up by 2 here, but instead it's done when mixing the windows */
          yr = S_MUL(re,t0) + S_MUL(im,t1);
-         yi = S_MUL(im,t0) - S_MUL(re,t1);
-         yp1[0] = -yr;
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         yp1[0] = yr;
          yp0[1] = yi;
          yp0 += 2;
          yp1 -= 2;