shithub: aacdec

ref: 91082ce9a12c7bceaef1b806a8cffa3781699d59
dir: /libfaad/cfft.c/

View raw version
/*
** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
** Copyright (C) 2003 M. Bakker, Ahead Software AG, http://www.nero.com
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
** 
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
** 
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software 
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**
** Any non-GPL usage of this software or parts of this software is strictly
** forbidden.
**
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through [email protected].
**
** $Id: cfft.c,v 1.17 2003/11/04 21:43:30 menno Exp $
**/

/*
 * Algorithmically based on Fortran-77 FFTPACK
 * by Paul N. Swarztrauber(Version 4, 1985).
 *
 * Does even sized fft only
 */

/* isign is +1 for backward and -1 for forward transforms */

#include "common.h"
#include "structs.h"

#include <stdlib.h>

#include "cfft.h"
#include "cfft_tab.h"


/*----------------------------------------------------------------------
   passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd.
  ----------------------------------------------------------------------*/

static void passf2(const uint16_t ido, const uint16_t l1, const complex_t *cc,
                   complex_t *ch, const complex_t *wa, const int8_t isign)
{
    uint16_t i, k, ah, ac;

    if (ido == 1)
    {
        for (k = 0; k < l1; k++)
        {
            ah = 2*k;
            ac = 4*k;

            RE(ch[ah])    = RE(cc[ac]) + RE(cc[ac+1]);
            RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]);
            IM(ch[ah])    = IM(cc[ac]) + IM(cc[ac+1]);
            IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]);
        }
    } else {
        for (k = 0; k < l1; k++)
        {
            ah = k*ido;
            ac = 2*k*ido;

            for (i = 0; i < ido; i++)
            {
                complex_t t2;

                RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]);
                RE(t2)       = RE(cc[ac+i]) - RE(cc[ac+i+ido]);

                IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]);
                IM(t2)       = IM(cc[ac+i]) - IM(cc[ac+i+ido]);

                RE(ch[ah+i+l1*ido]) = MUL_R_C(RE(t2),RE(wa[i])) - MUL_R_C(IM(t2),IM(wa[i]))*isign;
                IM(ch[ah+i+l1*ido]) = MUL_R_C(IM(t2),RE(wa[i])) + MUL_R_C(RE(t2),IM(wa[i]))*isign;
            }
        }
    }
}


static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc,
                   complex_t *ch, const complex_t *wa1, const complex_t *wa2,
                   const int8_t isign)
{
    static real_t taur = COEF_CONST(-0.5);
    static real_t taui = COEF_CONST(0.866025403784439);
    uint16_t i, k, ac, ah;
    complex_t c2, c3, d2, d3, t2;

    if (ido == 1)
    {
        for (k = 0; k < l1; k++)
        {
            ac = 3*k+1;
            ah = k;

            RE(t2) = RE(cc[ac]) + RE(cc[ac+1]);
            IM(t2) = IM(cc[ac]) + IM(cc[ac+1]);
            RE(c2) = RE(cc[ac-1]) + MUL_R_C(RE(t2),taur);
            IM(c2) = IM(cc[ac-1]) + MUL_R_C(IM(t2),taur);

            RE(ch[ah]) = RE(cc[ac-1]) + RE(t2);
            IM(ch[ah]) = IM(cc[ac-1]) + IM(t2);

            RE(c3) = MUL_R_C((RE(cc[ac]) - RE(cc[ac+1])), taui)*isign;
            IM(c3) = MUL_R_C((IM(cc[ac]) - IM(cc[ac+1])), taui)*isign;

            RE(ch[ah+l1]) = RE(c2) - IM(c3);
            IM(ch[ah+l1]) = IM(c2) + RE(c3);
            RE(ch[ah+2*l1]) = RE(c2) + IM(c3);
            IM(ch[ah+2*l1]) = IM(c2) - RE(c3);
        }
    } else {
        for (k = 0; k < l1; k++)
        {
            for (i = 0; i < ido; i++)
            {
                ac = i + (3*k+1)*ido;
                ah = i + k * ido;

                RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]);
                RE(c2) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),taur);
                IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]);
                IM(c2) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),taur);

                RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2);
                IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2);

                RE(c3) = MUL_R_C((RE(cc[ac]) - RE(cc[ac+ido])), taui)*isign;
                IM(c3) = MUL_R_C((IM(cc[ac]) - IM(cc[ac+ido])), taui)*isign;

                RE(d2) = RE(c2) - IM(c3);
                IM(d3) = IM(c2) - RE(c3);
                RE(d3) = RE(c2) + IM(c3);
                IM(d2) = IM(c2) + RE(c3);

                RE(ch[ah+l1*ido]) = MUL_R_C(RE(d2),RE(wa1[i])) - MUL_R_C(IM(d2),IM(wa1[i]))*isign;
                IM(ch[ah+l1*ido]) = MUL_R_C(IM(d2),RE(wa1[i])) + MUL_R_C(RE(d2),IM(wa1[i]))*isign;
                RE(ch[ah+l1*2*ido]) = MUL_R_C(RE(d3),RE(wa2[i])) - MUL_R_C(IM(d3),IM(wa2[i]))*isign;
                IM(ch[ah+l1*2*ido]) = MUL_R_C(IM(d3),RE(wa2[i])) + MUL_R_C(RE(d3),IM(wa2[i]))*isign;
            }
        }
    }
}

#if 0
typedef real_t simd_complex_t[4];

/*
  complex_add_sub(c1, c2, a1, a2);
  complex_mult(a1, c1, w0);
  complex_mult(a2, c2, w2);
*/
static INLINE void complex_func(simd_complex_t a1, simd_complex_t a2,
                                const simd_complex_t z1, const simd_complex_t z2,
                                const simd_complex_t w1, const simd_complex_t w2)
{
    __asm {
        mov eax, a1
        mov ebx, a2
        movups xmm0, [eax]
        movups xmm2, [ebx]
        movups xmm4, [eax]
        addps xmm0, xmm2   ; xmm0 = c1
        subps xmm4, xmm2   ; xmm4 = c2


        ; complex mult
        mov ecx, w1
        movups xmm1, [ecx]
        movups xmm2, xmm0
        movups xmm3, xmm1

        mulps xmm0, xmm1

        shufps xmm2, xmm2, 0xB1
        shufps xmm0, xmm0, 0xD8

        mulps xmm2, xmm3

        movhlps xmm1, xmm0
        shufps xmm2, xmm2, 0xD8

        subps xmm0, xmm1
        movhlps xmm3, xmm2
        addps xmm2, xmm3

        unpcklps xmm0, xmm2
        movups [eax], xmm0

        ; complex mult
        mov ecx, w2
        movups xmm1, [ecx]
        movups xmm2, xmm4
        movups xmm3, xmm1

        mulps xmm4, xmm1

        shufps xmm2, xmm2, 0xB1
        shufps xmm4, xmm4, 0xD8

        mulps xmm2, xmm3

        movhlps xmm1, xmm4
        shufps xmm2, xmm2, 0xD8

        subps xmm4, xmm1
        movhlps xmm3, xmm2
        addps xmm2, xmm3

        unpcklps xmm4, xmm2
        movups [ebx], xmm4
    }
}

/* complex a = z1*z2 */
static INLINE void complex_mult(simd_complex_t a, const simd_complex_t z1, const simd_complex_t z2)
{
#if 0
    a[0] = MUL_R_C(z1[0],z2[0]) - MUL_R_C(z1[1],z2[1]);
    a[1] = MUL_R_C(z1[1],z2[0]) + MUL_R_C(z1[0],z2[1]);

    a[2] = MUL_R_C(z1[2],z2[2]) - MUL_R_C(z1[3],z2[3]);
    a[3] = MUL_R_C(z1[3],z2[2]) + MUL_R_C(z1[2],z2[3]);
#else
    __asm {
        mov eax, z1
        mov ecx, z2
        mov edx, a

        movups xmm0, [eax]
        movups xmm1, [ecx]
        movaps xmm2, xmm0
        movaps xmm3, xmm1

        mulps xmm0, xmm1

        shufps xmm2, xmm2, 0xB1
        shufps xmm0, xmm0, 0xD8

        mulps xmm2, xmm3

        movhlps xmm1, xmm0
        shufps xmm2, xmm2, 0xD8

        subps xmm0, xmm1
        movhlps xmm3, xmm2
        addps xmm2, xmm3

        unpcklps xmm0, xmm2

        movups [edx], xmm0
    }
#endif
}

/* complex a = z1+z2 */
static void complex_add(complex_t a, const complex_t z1, const complex_t z2)
{
    RE(a) = RE(z1) + RE(z2);
    IM(a) = IM(z1) + IM(z2);
}

/* complex a = z1-z2 */
static void complex_sub(complex_t a, const complex_t z1, const complex_t z2)
{
    RE(a) = RE(z1) - RE(z2);
    IM(a) = IM(z1) - IM(z2);
}

/* complex a1 = z1+z2; a2 = z1-z2 */
static INLINE void complex_add_sub(simd_complex_t a1, simd_complex_t a2,
                            const simd_complex_t z1, const simd_complex_t z2)
{
#if 0
    a1[0] = z1[0] + z2[0];
    a1[1] = z1[1] + z2[1];
    a1[2] = z1[2] + z2[2];
    a1[3] = z1[3] + z2[3];
    a2[0] = z1[0] - z2[0];
    a2[1] = z1[1] - z2[1];
    a2[2] = z1[2] - z2[2];
    a2[3] = z1[3] - z2[3];
#else
    __asm {
        mov eax, DWORD PTR z1
        mov ebx, DWORD PTR z2
        mov ecx, DWORD PTR a1
        mov edx, DWORD PTR a2
        movups xmm1, [eax]
        movups xmm2, [ebx]
        movups xmm3, [eax]
        addps xmm1, xmm2
        subps xmm3, xmm2
        movups [ecx], xmm1
        movups [edx], xmm3
    }
#endif
}

static void passf4(const uint16_t ido, const uint16_t l1, const complex_t *cc,
                   complex_t *ch, const complex_t *wa1, const complex_t *wa2,
                   const complex_t *wa3, const int8_t isign)
{
    uint16_t i, k, ac, ah;

    if (ido == 1)
    {
        for (k = 0; k < l1; k++)
        {
            complex_t t1, t2, t3, t4;

            ac = 4*k;
            ah = k;

            RE(t2) = RE(cc[ac])   + RE(cc[ac+2]);
            RE(t1) = RE(cc[ac])   - RE(cc[ac+2]);
            IM(t2) = IM(cc[ac])   + IM(cc[ac+2]);
            IM(t1) = IM(cc[ac])   - IM(cc[ac+2]);
            RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
            IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
            IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
            RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);

            RE(ch[ah])      = RE(t2) + RE(t3);
            RE(ch[ah+2*l1]) = RE(t2) - RE(t3);

            IM(ch[ah])      = IM(t2) + IM(t3);
            IM(ch[ah+2*l1]) = IM(t2) - IM(t3);

            RE(ch[ah+l1])   = RE(t1) + RE(t4)*isign;
            RE(ch[ah+3*l1]) = RE(t1) - RE(t4)*isign;

            IM(ch[ah+l1])   = IM(t1) + IM(t4)*isign;
            IM(ch[ah+3*l1]) = IM(t1) - IM(t4)*isign;
        }
    } else {
        for (k = 0; k < l1; k++)
        {
            ac = 4*k*ido;
            ah = k*ido;

            for (i = 0; i < ido; i++)
            {
                simd_complex_t c1, c2, t1, t2;
                simd_complex_t w0 = {1,0,0,0};
                simd_complex_t w2;

                w0[2] = wa1[i][0]*isign;
                w0[3] = wa1[i][1]*isign;
                w2[0] = wa2[i][0]*isign;
                w2[1] = wa2[i][1]*isign;
                w2[2] = wa3[i][0]*isign;
                w2[3] = wa3[i][1]*isign;

                t1[0] = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
                t1[1] = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
                t1[2] = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
                t1[3] = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
                t2[0] = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
                t2[3] = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
                t2[1] = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
                t2[2] = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);

                t2[2] *= isign;
                t2[3] *= isign;

#if 0
                complex_add_sub(c1, c2, t1, t2);
                complex_mult(t1, c1, w0);
                complex_mult(t2, c2, w2);
#else
                complex_func(t1, t2, c1, c2, w0, w2);
#endif

                RE(ch[ah+i]) = t1[0];
                IM(ch[ah+i]) = t1[1];
                RE(ch[ah+i+l1*ido]) = t1[2];
                IM(ch[ah+i+l1*ido]) = t1[3];
                RE(ch[ah+i+2*l1*ido]) = t2[0];
                IM(ch[ah+i+2*l1*ido]) = t2[1];
                RE(ch[ah+i+3*l1*ido]) = t2[2];
                IM(ch[ah+i+3*l1*ido]) = t2[3];
            }
        }
    }
}
#else
static void passf4(const uint16_t ido, const uint16_t l1, const complex_t *cc,
                   complex_t *ch, const complex_t *wa1, const complex_t *wa2,
                   const complex_t *wa3, const int8_t isign)
{
    uint16_t i, k, ac, ah;

    if (ido == 1)
    {
        for (k = 0; k < l1; k++)
        {
            complex_t t1, t2, t3, t4;

            ac = 4*k;
            ah = k;

            RE(t2) = RE(cc[ac])   + RE(cc[ac+2]);
            RE(t1) = RE(cc[ac])   - RE(cc[ac+2]);
            IM(t2) = IM(cc[ac])   + IM(cc[ac+2]);
            IM(t1) = IM(cc[ac])   - IM(cc[ac+2]);
            RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
            IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
            IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
            RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);

            RE(ch[ah])      = RE(t2) + RE(t3);
            RE(ch[ah+2*l1]) = RE(t2) - RE(t3);

            IM(ch[ah])      = IM(t2) + IM(t3);
            IM(ch[ah+2*l1]) = IM(t2) - IM(t3);

            RE(ch[ah+l1])   = RE(t1) + RE(t4)*isign;
            RE(ch[ah+3*l1]) = RE(t1) - RE(t4)*isign;

            IM(ch[ah+l1])   = IM(t1) + IM(t4)*isign;
            IM(ch[ah+3*l1]) = IM(t1) - IM(t4)*isign;
        }
    } else {
        for (k = 0; k < l1; k++)
        {
            ac = 4*k*ido;
            ah = k*ido;

            for (i = 0; i < ido; i++)
            {
                complex_t c2, c3, c4, t1, t2, t3, t4;

                RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
                RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
                IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
                IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
                RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
                IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
                IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
                RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);

                RE(c2) = RE(t1) + RE(t4)*isign;
                RE(c4) = RE(t1) - RE(t4)*isign;

                IM(c2) = IM(t1) + IM(t4)*isign;
                IM(c4) = IM(t1) - IM(t4)*isign;

                RE(ch[ah+i]) = RE(t2) + RE(t3);
                RE(c3)       = RE(t2) - RE(t3);

                IM(ch[ah+i]) = IM(t2) + IM(t3);
                IM(c3)       = IM(t2) - IM(t3);

                IM(ch[ah+i+l1*ido])   = MUL_R_C(IM(c2),RE(wa1[i])) + MUL_R_C(RE(c2),IM(wa1[i]))*isign;
                RE(ch[ah+i+l1*ido])   = MUL_R_C(RE(c2),RE(wa1[i])) - MUL_R_C(IM(c2),IM(wa1[i]))*isign;
                IM(ch[ah+i+2*l1*ido]) = MUL_R_C(IM(c3),RE(wa2[i])) + MUL_R_C(RE(c3),IM(wa2[i]))*isign;
                RE(ch[ah+i+2*l1*ido]) = MUL_R_C(RE(c3),RE(wa2[i])) - MUL_R_C(IM(c3),IM(wa2[i]))*isign;
                IM(ch[ah+i+3*l1*ido]) = MUL_R_C(IM(c4),RE(wa3[i])) + MUL_R_C(RE(c4),IM(wa3[i]))*isign;
                RE(ch[ah+i+3*l1*ido]) = MUL_R_C(RE(c4),RE(wa3[i])) - MUL_R_C(IM(c4),IM(wa3[i]))*isign;
            }
        }
    }
}
#endif

static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc,
                   complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3,
                   const complex_t *wa4, const int8_t isign)
{
    static real_t tr11 = COEF_CONST(0.309016994374947);
    static real_t ti11 = COEF_CONST(0.951056516295154);
    static real_t tr12 = COEF_CONST(-0.809016994374947);
    static real_t ti12 = COEF_CONST(0.587785252292473);
    uint16_t i, k, ac, ah;
    complex_t c2, c3, c4, c5, d3, d4, d5, d2, t2, t3, t4, t5;

    if (ido == 1)
    {
        for (k = 0; k < l1; k++)
        {
            ac = 5*k + 1;
            ah = k;

            RE(t2) = RE(cc[ac]) + RE(cc[ac+3]);
            IM(t2) = IM(cc[ac]) + IM(cc[ac+3]);
            RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]);
            IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]);
            RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]);
            IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]);
            RE(t5) = RE(cc[ac]) - RE(cc[ac+3]);
            IM(t5) = IM(cc[ac]) - IM(cc[ac+3]);

            RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3);
            IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3);

            RE(c2) = RE(cc[ac-1]) + MUL_R_C(RE(t2),tr11) + MUL_R_C(RE(t3),tr12);
            IM(c2) = IM(cc[ac-1]) + MUL_R_C(IM(t2),tr11) + MUL_R_C(IM(t3),tr12);
            RE(c3) = RE(cc[ac-1]) + MUL_R_C(RE(t2),tr12) + MUL_R_C(RE(t3),tr11);
            IM(c3) = IM(cc[ac-1]) + MUL_R_C(IM(t2),tr12) + MUL_R_C(IM(t3),tr11);
            RE(c4) = (MUL_R_C(RE(t5),ti12)*isign - MUL_R_C(RE(t4),ti11));
            IM(c4) = (MUL_R_C(IM(t5),ti12)*isign - MUL_R_C(IM(t4),ti11));
            RE(c5) = (MUL_R_C(RE(t5),ti11)*isign + MUL_R_C(RE(t4),ti12));
            IM(c5) = (MUL_R_C(IM(t5),ti11)*isign + MUL_R_C(IM(t4),ti12));

            RE(ch[ah+l1]) = RE(c2) - IM(c5);
            IM(ch[ah+l1]) = IM(c2) + RE(c5);
            RE(ch[ah+2*l1]) = RE(c3) - IM(c4);
            IM(ch[ah+2*l1]) = IM(c3) + RE(c4);
            RE(ch[ah+3*l1]) = RE(c3) + IM(c4);
            IM(ch[ah+3*l1]) = IM(c3) - RE(c4);
            RE(ch[ah+4*l1]) = RE(c2) + IM(c5);
            IM(ch[ah+4*l1]) = IM(c2) - RE(c5);
        }
    } else {
        for (k = 0; k < l1; k++)
        {
            for (i = 0; i < ido; i++)
            {
                ac = i + (k*5 + 1) * ido;
                ah = i + k * ido;

                RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]);
                IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]);
                RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]);
                IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]);
                RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]);
                IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]);
                RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]);
                IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]);

                RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3);
                IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3);

                RE(c2) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),tr11) + MUL_R_C(RE(t3),tr12);
                IM(c2) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),tr11) + MUL_R_C(IM(t3),tr12);
                RE(c3) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),tr12) + MUL_R_C(RE(t3),tr11);
                IM(c3) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),tr12) + MUL_R_C(IM(t3),tr11);
                RE(c4) = (MUL_R_C(RE(t5),ti12)*isign - MUL_R_C(RE(t4),ti11));
                IM(c4) = (MUL_R_C(IM(t5),ti12)*isign - MUL_R_C(IM(t4),ti11));
                RE(c5) = (MUL_R_C(RE(t5),ti11)*isign + MUL_R_C(RE(t4),ti12));
                IM(c5) = (MUL_R_C(IM(t5),ti11)*isign + MUL_R_C(IM(t4),ti12));

                IM(d2) = IM(c2) + RE(c5);
                IM(d3) = IM(c3) + RE(c4);
                RE(d4) = RE(c3) + IM(c4);
                RE(d5) = RE(c2) + IM(c5);
                RE(d2) = RE(c2) - IM(c5);
                IM(d5) = IM(c2) - RE(c5);
                RE(d3) = RE(c3) - IM(c4);
                IM(d4) = IM(c3) - RE(c4);

                RE(ch[ah+l1*ido]) = MUL_R_C(RE(d2),RE(wa1[i])) - MUL_R_C(IM(d2),IM(wa1[i]))*isign;
                IM(ch[ah+l1*ido]) = MUL_R_C(IM(d2),RE(wa1[i])) + MUL_R_C(RE(d2),IM(wa1[i]))*isign;
                RE(ch[ah+2*l1*ido]) = MUL_R_C(RE(d3),RE(wa2[i])) - MUL_R_C(IM(d3),IM(wa2[i]))*isign;
                IM(ch[ah+2*l1*ido]) = MUL_R_C(IM(d3),RE(wa2[i])) + MUL_R_C(RE(d3),IM(wa2[i]))*isign;
                RE(ch[ah+3*l1*ido]) = MUL_R_C(RE(d4),RE(wa3[i])) - MUL_R_C(IM(d4),IM(wa3[i]))*isign;
                IM(ch[ah+3*l1*ido]) = MUL_R_C(IM(d4),RE(wa3[i])) + MUL_R_C(RE(d4),IM(wa3[i]))*isign;
                RE(ch[ah+4*l1*ido]) = MUL_R_C(RE(d5),RE(wa4[i])) - MUL_R_C(IM(d5),IM(wa4[i]))*isign;
                IM(ch[ah+4*l1*ido]) = MUL_R_C(IM(d5),RE(wa4[i])) + MUL_R_C(RE(d5),IM(wa4[i]))*isign;
            }
        }
    }
}


/*----------------------------------------------------------------------
   cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
  ----------------------------------------------------------------------*/

INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch,
                   uint16_t *ifac, complex_t *wa, int8_t isign)
{
    uint16_t i;
    uint16_t k1, l1, l2;
    uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1;

    nf = ifac[1];
    na = 0;
    l1 = 1;
    iw = 0;

    for (k1 = 2; k1 <= nf+1; k1++)
    {
        ip = ifac[k1];
        l2 = ip*l1;
        ido = n / l2;
        idl1 = ido*l1;

        switch (ip)
        {
        case 4:
            ix2 = iw + ido;
            ix3 = ix2 + ido;

            if (na == 0)
                passf4(ido, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], isign);
            else
                passf4(ido, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], isign);

            na = 1 - na;
            break;
        case 2:
            if (na == 0)
                passf2(ido, l1, c, ch, &wa[iw], isign);
            else
                passf2(ido, l1, ch, c, &wa[iw], isign);

            na = 1 - na;
            break;
        case 3:
            ix2 = iw + ido;

            if (na == 0)
                passf3(ido, l1, c, ch, &wa[iw], &wa[ix2], isign);
            else
                passf3(ido, l1, ch, c, &wa[iw], &wa[ix2], isign);

            na = 1 - na;
            break;
        case 5:
            ix2 = iw + ido;
            ix3 = ix2 + ido;
            ix4 = ix3 + ido;

            if (na == 0)
                passf5(ido, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
            else
                passf5(ido, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);

            na = 1 - na;
            break;
        }

        l1 = l2;
        iw += (ip-1) * ido;
    }

    if (na == 0)
        return;

    for (i = 0; i < n; i++)
    {
        RE(c[i]) = RE(ch[i]);
        IM(c[i]) = IM(ch[i]);
    }
}

void cfftf(cfft_info *cfft, complex_t *c)
{
    cfftf1(cfft->n, c, cfft->work, cfft->ifac, cfft->tab, -1);
}

void cfftb(cfft_info *cfft, complex_t *c)
{
    cfftf1(cfft->n, c, cfft->work, cfft->ifac, cfft->tab, +1);
}

static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac)
{
    static uint16_t ntryh[4] = {3, 4, 2, 5};
#ifndef FIXED_POINT
    real_t arg, argh, argld, fi;
    uint16_t ido, ipm;
    uint16_t i1, k1, l1, l2;
    uint16_t ld, ii, ip;
#endif
    uint16_t ntry, i, j;
    uint16_t ib;
    uint16_t nf, nl, nq, nr;

    nl = n;
    nf = 0;
    j = 0;

startloop:
    j++;

    if (j <= 4)
        ntry = ntryh[j-1];
    else
        ntry += 2;

    do
    {
        nq = nl / ntry;
        nr = nl - ntry*nq;

        if (nr != 0)
            goto startloop;

        nf++;
        ifac[nf+1] = ntry;
        nl = nq;

        if (ntry == 2 && nf != 1)
        {
            for (i = 2; i <= nf; i++)
            {
                ib = nf - i + 2;
                ifac[ib+1] = ifac[ib];
            }
            ifac[2] = 2;
        }
    } while (nl != 1);

    ifac[0] = n;
    ifac[1] = nf;

#ifndef FIXED_POINT
    argh = (real_t)2.0*M_PI / (real_t)n;
    i = 0;
    l1 = 1;

    for (k1 = 1; k1 <= nf; k1++)
    {
        ip = ifac[k1+1];
        ld = 0;
        l2 = l1*ip;
        ido = n / l2;
        ipm = ip - 1;

        for (j = 0; j < ipm; j++)
        {
            i1 = i;
            RE(wa[i]) = 1.0;
            IM(wa[i]) = 0.0;
            ld += l1;
            fi = 0;
            argld = ld*argh;

            for (ii = 0; ii < ido; ii++)
            {
                i++;
                fi++;
                arg = fi * argld;
                RE(wa[i]) = (real_t)cos(arg);
                IM(wa[i]) = (real_t)sin(arg);
            }

            if (ip > 5)
            {
                RE(wa[i1]) = RE(wa[i]);
                IM(wa[i1]) = IM(wa[i]);
            }
        }
        l1 = l2;
    }
#endif
}

cfft_info *cffti(uint16_t n)
{
    cfft_info *cfft = (cfft_info*)malloc(sizeof(cfft_info));

    cfft->n = n;
    cfft->work = (complex_t*)malloc(n*sizeof(complex_t));

#ifndef FIXED_POINT
    cfft->tab = (complex_t*)malloc(n*sizeof(complex_t));

    cffti1(n, cfft->tab, cfft->ifac);
#else
    cffti1(n, NULL, cfft->ifac);

    switch (n)
    {
    case 64: cfft->tab = cfft_tab_64; break;
    case 512: cfft->tab = cfft_tab_512; break;
#ifdef LD_DEC
    case 256: cfft->tab = cfft_tab_256; break;
#endif

#ifdef ALLOW_SMALL_FRAMELENGTH
    case 60: cfft->tab = cfft_tab_60; break;
    case 480: cfft->tab = cfft_tab_480; break;
#ifdef LD_DEC
    case 240: cfft->tab = cfft_tab_240; break;
#endif
#endif
    }
#endif

    return cfft;
}

void cfftu(cfft_info *cfft)
{
    if (cfft->work) free(cfft->work);
#ifndef FIXED_POINT
    if (cfft->tab) free(cfft->tab);
#endif

    if (cfft) free(cfft);
}