ref: e2cdf9b98f8ebb814c7b8c23c6f793715719d033
parent: 6db968e9ad1a604fdaa35578df959a8098721a86
author: Ralph Giles <[email protected]>
date: Wed Jun 14 10:37:30 EDT 2017
Rename arm ne10 assembly optimization files. Make these consistent with the other optimization file sets which use a suffix to indicate the extension they use. Signed-off-by: Jonathan Lennox <[email protected]>
--- /dev/null
+++ b/celt/arm/celt_fft_ne10.c
@@ -1,0 +1,173 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/**
+ @file celt_fft_ne10.c
+ @brief ARM Neon optimizations for fft using NE10 library
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include <NE10_dsp.h>
+#include "os_support.h"
+#include "kiss_fft.h"
+#include "stack_alloc.h"
+
+#if !defined(FIXED_POINT)
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
+#else
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
+#endif
+
+#if defined(CUSTOM_MODES)
+
+/* nfft lengths in NE10 that support scaled fft */
+# define NE10_FFTSCALED_SUPPORT_MAX 4
+static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
+ 480, 240, 120, 60
+};
+
+int opus_fft_alloc_arm_neon(kiss_fft_state *st)
+{
+ int i;
+ size_t memneeded = sizeof(struct arch_fft_state);
+
+ st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
+ if (!st->arch_fft)
+ return -1;
+
+ for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
+ if(st->nfft == ne10_fft_scaled_support[i])
+ break;
+ }
+ if (i == NE10_FFTSCALED_SUPPORT_MAX) {
+ /* This nfft length (scaled fft) is not supported in NE10 */
+ st->arch_fft->is_supported = 0;
+ st->arch_fft->priv = NULL;
+ }
+ else {
+ st->arch_fft->is_supported = 1;
+ st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
+ if (st->arch_fft->priv == NULL) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void opus_fft_free_arm_neon(kiss_fft_state *st)
+{
+ NE10_FFT_CFG_TYPE_T cfg;
+
+ if (!st->arch_fft)
+ return;
+
+ cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
+ if (cfg)
+ NE10_FFT_DESTROY_C2C_TYPE(cfg);
+ opus_free(st->arch_fft);
+}
+#endif
+
+void opus_fft_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout)
+{
+ NE10_FFT_STATE_TYPE_T state;
+ NE10_FFT_CFG_TYPE_T cfg = &state;
+ VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+ SAVE_STACK;
+ ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+ if (!st->arch_fft->is_supported) {
+ /* This nfft length (scaled fft) not supported in NE10 */
+ opus_fft_c(st, fin, fout);
+ }
+ else {
+ memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+ state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+ state.is_forward_scaled = 1;
+
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 0);
+#else
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 0, 1);
+#endif
+ }
+ RESTORE_STACK;
+}
+
+void opus_ifft_neon(const kiss_fft_state *st,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout)
+{
+ NE10_FFT_STATE_TYPE_T state;
+ NE10_FFT_CFG_TYPE_T cfg = &state;
+ VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+ SAVE_STACK;
+ ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+ if (!st->arch_fft->is_supported) {
+ /* This nfft length (scaled fft) not supported in NE10 */
+ opus_ifft_c(st, fin, fout);
+ }
+ else {
+ memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+ state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+ state.is_backward_scaled = 0;
+
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 1);
+#else
+ NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+ (NE10_FFT_CPX_TYPE_T *)fin,
+ cfg, 1, 0);
+#endif
+ }
+ RESTORE_STACK;
+}
--- /dev/null
+++ b/celt/arm/celt_mdct_ne10.c
@@ -1,0 +1,258 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/**
+ @file celt_mdct_ne10.c
+ @brief ARM Neon optimizations for mdct using NE10 library
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include "mdct.h"
+#include "stack_alloc.h"
+
+void clt_mdct_forward_neon(const mdct_lookup *l,
+ kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window,
+ int overlap, int shift, int stride, int arch)
+{
+ int i;
+ int N, N2, N4;
+ VARDECL(kiss_fft_scalar, f);
+ VARDECL(kiss_fft_cpx, f2);
+ const kiss_fft_state *st = l->kfft[shift];
+ const kiss_twiddle_scalar *trig;
+
+ SAVE_STACK;
+
+ N = l->n;
+ trig = l->trig;
+ for (i=0;i<shift;i++)
+ {
+ N >>= 1;
+ trig += N;
+ }
+ N2 = N>>1;
+ N4 = N>>2;
+
+ ALLOC(f, N2, kiss_fft_scalar);
+ ALLOC(f2, N4, kiss_fft_cpx);
+
+ /* Consider the input to be composed of four blocks: [a, b, c, d] */
+ /* Window, shuffle, fold */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+ const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+ kiss_fft_scalar * OPUS_RESTRICT yp = f;
+ const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+ const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+ for(i=0;i<((overlap+3)>>2);i++)
+ {
+ /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+ *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+ *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
+ xp1+=2;
+ xp2-=2;
+ wp1+=2;
+ wp2-=2;
+ }
+ wp1 = window;
+ wp2 = window+overlap-1;
+ for(;i<N4-((overlap+3)>>2);i++)
+ {
+ /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+ *yp++ = *xp2;
+ *yp++ = *xp1;
+ xp1+=2;
+ xp2-=2;
+ }
+ for(;i<N4;i++)
+ {
+ /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+ *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+ *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
+ xp1+=2;
+ xp2-=2;
+ wp1+=2;
+ wp2-=2;
+ }
+ }
+ /* Pre-rotation */
+ {
+ kiss_fft_scalar * OPUS_RESTRICT yp = f;
+ const kiss_twiddle_scalar *t = &trig[0];
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_cpx yc;
+ kiss_twiddle_scalar t0, t1;
+ kiss_fft_scalar re, im, yr, yi;
+ t0 = t[i];
+ t1 = t[N4+i];
+ re = *yp++;
+ im = *yp++;
+ yr = S_MUL(re,t0) - S_MUL(im,t1);
+ yi = S_MUL(im,t0) + S_MUL(re,t1);
+ yc.r = yr;
+ yc.i = yi;
+ f2[i] = yc;
+ }
+ }
+
+ opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
+
+ /* Post-rotate */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
+ kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+ kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+ const kiss_twiddle_scalar *t = &trig[0];
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_scalar yr, yi;
+ yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+ yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+ *yp1 = yr;
+ *yp2 = yi;
+ fp++;
+ yp1 += 2*stride;
+ yp2 -= 2*stride;
+ }
+ }
+ RESTORE_STACK;
+}
+
+void clt_mdct_backward_neon(const mdct_lookup *l,
+ kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 * OPUS_RESTRICT window,
+ int overlap, int shift, int stride, int arch)
+{
+ int i;
+ int N, N2, N4;
+ VARDECL(kiss_fft_scalar, f);
+ const kiss_twiddle_scalar *trig;
+ const kiss_fft_state *st = l->kfft[shift];
+
+ N = l->n;
+ trig = l->trig;
+ for (i=0;i<shift;i++)
+ {
+ N >>= 1;
+ trig += N;
+ }
+ N2 = N>>1;
+ N4 = N>>2;
+
+ ALLOC(f, N2, kiss_fft_scalar);
+
+ /* Pre-rotate */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+ const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+ kiss_fft_scalar * OPUS_RESTRICT yp = f;
+ const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_scalar yr, yi;
+ yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+ yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
+ yp[2*i] = yr;
+ yp[2*i+1] = yi;
+ xp1+=2*stride;
+ xp2-=2*stride;
+ }
+ }
+
+ opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
+
+ /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+ it in-place. */
+ {
+ kiss_fft_scalar * yp0 = out+(overlap>>1);
+ kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+ const kiss_twiddle_scalar *t = &trig[0];
+ /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+ middle pair will be computed twice. */
+ for(i=0;i<(N4+1)>>1;i++)
+ {
+ kiss_fft_scalar re, im, yr, yi;
+ kiss_twiddle_scalar t0, t1;
+ re = yp0[0];
+ im = yp0[1];
+ t0 = t[i];
+ t1 = t[N4+i];
+ /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+ yr = S_MUL(re,t0) + S_MUL(im,t1);
+ yi = S_MUL(re,t1) - S_MUL(im,t0);
+ re = yp1[0];
+ im = yp1[1];
+ yp0[0] = yr;
+ yp1[1] = yi;
+
+ t0 = t[(N4-i-1)];
+ t1 = t[(N2-i-1)];
+ /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+ yr = S_MUL(re,t0) + S_MUL(im,t1);
+ yi = S_MUL(re,t1) - S_MUL(im,t0);
+ yp1[0] = yr;
+ yp0[1] = yi;
+ yp0 += 2;
+ yp1 -= 2;
+ }
+ }
+
+ /* Mirror on both sides for TDAC */
+ {
+ kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+ kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+ const opus_val16 * OPUS_RESTRICT wp1 = window;
+ const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+ for(i = 0; i < overlap/2; i++)
+ {
+ kiss_fft_scalar x1, x2;
+ x1 = *xp1;
+ x2 = *yp1;
+ *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+ *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+ wp1++;
+ wp2--;
+ }
+ }
+ RESTORE_STACK;
+}
--- a/celt/arm/celt_ne10_fft.c
+++ /dev/null
@@ -1,173 +1,0 @@
-/* Copyright (c) 2015 Xiph.Org Foundation
- Written by Viswanath Puttagunta */
-/**
- @file celt_ne10_fft.c
- @brief ARM Neon optimizations for fft using NE10 library
- */
-
-/*
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef SKIP_CONFIG_H
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-#endif
-
-#include <NE10_dsp.h>
-#include "os_support.h"
-#include "kiss_fft.h"
-#include "stack_alloc.h"
-
-#if !defined(FIXED_POINT)
-# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
-# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
-# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
-# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
-# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
-# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
-#else
-# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
-# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
-# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
-# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
-# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
-# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
-# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
-#endif
-
-#if defined(CUSTOM_MODES)
-
-/* nfft lengths in NE10 that support scaled fft */
-# define NE10_FFTSCALED_SUPPORT_MAX 4
-static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
- 480, 240, 120, 60
-};
-
-int opus_fft_alloc_arm_neon(kiss_fft_state *st)
-{
- int i;
- size_t memneeded = sizeof(struct arch_fft_state);
-
- st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
- if (!st->arch_fft)
- return -1;
-
- for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
- if(st->nfft == ne10_fft_scaled_support[i])
- break;
- }
- if (i == NE10_FFTSCALED_SUPPORT_MAX) {
- /* This nfft length (scaled fft) is not supported in NE10 */
- st->arch_fft->is_supported = 0;
- st->arch_fft->priv = NULL;
- }
- else {
- st->arch_fft->is_supported = 1;
- st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
- if (st->arch_fft->priv == NULL) {
- return -1;
- }
- }
- return 0;
-}
-
-void opus_fft_free_arm_neon(kiss_fft_state *st)
-{
- NE10_FFT_CFG_TYPE_T cfg;
-
- if (!st->arch_fft)
- return;
-
- cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
- if (cfg)
- NE10_FFT_DESTROY_C2C_TYPE(cfg);
- opus_free(st->arch_fft);
-}
-#endif
-
-void opus_fft_neon(const kiss_fft_state *st,
- const kiss_fft_cpx *fin,
- kiss_fft_cpx *fout)
-{
- NE10_FFT_STATE_TYPE_T state;
- NE10_FFT_CFG_TYPE_T cfg = &state;
- VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
- SAVE_STACK;
- ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
-
- if (!st->arch_fft->is_supported) {
- /* This nfft length (scaled fft) not supported in NE10 */
- opus_fft_c(st, fin, fout);
- }
- else {
- memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
- state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
-#if !defined(FIXED_POINT)
- state.is_forward_scaled = 1;
-
- NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
- (NE10_FFT_CPX_TYPE_T *)fin,
- cfg, 0);
-#else
- NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
- (NE10_FFT_CPX_TYPE_T *)fin,
- cfg, 0, 1);
-#endif
- }
- RESTORE_STACK;
-}
-
-void opus_ifft_neon(const kiss_fft_state *st,
- const kiss_fft_cpx *fin,
- kiss_fft_cpx *fout)
-{
- NE10_FFT_STATE_TYPE_T state;
- NE10_FFT_CFG_TYPE_T cfg = &state;
- VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
- SAVE_STACK;
- ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
-
- if (!st->arch_fft->is_supported) {
- /* This nfft length (scaled fft) not supported in NE10 */
- opus_ifft_c(st, fin, fout);
- }
- else {
- memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
- state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
-#if !defined(FIXED_POINT)
- state.is_backward_scaled = 0;
-
- NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
- (NE10_FFT_CPX_TYPE_T *)fin,
- cfg, 1);
-#else
- NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
- (NE10_FFT_CPX_TYPE_T *)fin,
- cfg, 1, 0);
-#endif
- }
- RESTORE_STACK;
-}
--- a/celt/arm/celt_ne10_mdct.c
+++ /dev/null
@@ -1,258 +1,0 @@
-/* Copyright (c) 2015 Xiph.Org Foundation
- Written by Viswanath Puttagunta */
-/**
- @file celt_ne10_mdct.c
- @brief ARM Neon optimizations for mdct using NE10 library
- */
-
-/*
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef SKIP_CONFIG_H
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-#endif
-
-#include "kiss_fft.h"
-#include "_kiss_fft_guts.h"
-#include "mdct.h"
-#include "stack_alloc.h"
-
-void clt_mdct_forward_neon(const mdct_lookup *l,
- kiss_fft_scalar *in,
- kiss_fft_scalar * OPUS_RESTRICT out,
- const opus_val16 *window,
- int overlap, int shift, int stride, int arch)
-{
- int i;
- int N, N2, N4;
- VARDECL(kiss_fft_scalar, f);
- VARDECL(kiss_fft_cpx, f2);
- const kiss_fft_state *st = l->kfft[shift];
- const kiss_twiddle_scalar *trig;
-
- SAVE_STACK;
-
- N = l->n;
- trig = l->trig;
- for (i=0;i<shift;i++)
- {
- N >>= 1;
- trig += N;
- }
- N2 = N>>1;
- N4 = N>>2;
-
- ALLOC(f, N2, kiss_fft_scalar);
- ALLOC(f2, N4, kiss_fft_cpx);
-
- /* Consider the input to be composed of four blocks: [a, b, c, d] */
- /* Window, shuffle, fold */
- {
- /* Temp pointers to make it really clear to the compiler what we're doing */
- const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
- const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
- kiss_fft_scalar * OPUS_RESTRICT yp = f;
- const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
- const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
- for(i=0;i<((overlap+3)>>2);i++)
- {
- /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
- *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
- *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
- xp1+=2;
- xp2-=2;
- wp1+=2;
- wp2-=2;
- }
- wp1 = window;
- wp2 = window+overlap-1;
- for(;i<N4-((overlap+3)>>2);i++)
- {
- /* Real part arranged as a-bR, Imag part arranged as -c-dR */
- *yp++ = *xp2;
- *yp++ = *xp1;
- xp1+=2;
- xp2-=2;
- }
- for(;i<N4;i++)
- {
- /* Real part arranged as a-bR, Imag part arranged as -c-dR */
- *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
- *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
- xp1+=2;
- xp2-=2;
- wp1+=2;
- wp2-=2;
- }
- }
- /* Pre-rotation */
- {
- kiss_fft_scalar * OPUS_RESTRICT yp = f;
- const kiss_twiddle_scalar *t = &trig[0];
- for(i=0;i<N4;i++)
- {
- kiss_fft_cpx yc;
- kiss_twiddle_scalar t0, t1;
- kiss_fft_scalar re, im, yr, yi;
- t0 = t[i];
- t1 = t[N4+i];
- re = *yp++;
- im = *yp++;
- yr = S_MUL(re,t0) - S_MUL(im,t1);
- yi = S_MUL(im,t0) + S_MUL(re,t1);
- yc.r = yr;
- yc.i = yi;
- f2[i] = yc;
- }
- }
-
- opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
-
- /* Post-rotate */
- {
- /* Temp pointers to make it really clear to the compiler what we're doing */
- const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
- kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
- kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
- const kiss_twiddle_scalar *t = &trig[0];
- /* Temp pointers to make it really clear to the compiler what we're doing */
- for(i=0;i<N4;i++)
- {
- kiss_fft_scalar yr, yi;
- yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
- yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
- *yp1 = yr;
- *yp2 = yi;
- fp++;
- yp1 += 2*stride;
- yp2 -= 2*stride;
- }
- }
- RESTORE_STACK;
-}
-
-void clt_mdct_backward_neon(const mdct_lookup *l,
- kiss_fft_scalar *in,
- kiss_fft_scalar * OPUS_RESTRICT out,
- const opus_val16 * OPUS_RESTRICT window,
- int overlap, int shift, int stride, int arch)
-{
- int i;
- int N, N2, N4;
- VARDECL(kiss_fft_scalar, f);
- const kiss_twiddle_scalar *trig;
- const kiss_fft_state *st = l->kfft[shift];
-
- N = l->n;
- trig = l->trig;
- for (i=0;i<shift;i++)
- {
- N >>= 1;
- trig += N;
- }
- N2 = N>>1;
- N4 = N>>2;
-
- ALLOC(f, N2, kiss_fft_scalar);
-
- /* Pre-rotate */
- {
- /* Temp pointers to make it really clear to the compiler what we're doing */
- const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
- const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
- kiss_fft_scalar * OPUS_RESTRICT yp = f;
- const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
- for(i=0;i<N4;i++)
- {
- kiss_fft_scalar yr, yi;
- yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
- yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
- yp[2*i] = yr;
- yp[2*i+1] = yi;
- xp1+=2*stride;
- xp2-=2*stride;
- }
- }
-
- opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
-
- /* Post-rotate and de-shuffle from both ends of the buffer at once to make
- it in-place. */
- {
- kiss_fft_scalar * yp0 = out+(overlap>>1);
- kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
- const kiss_twiddle_scalar *t = &trig[0];
- /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
- middle pair will be computed twice. */
- for(i=0;i<(N4+1)>>1;i++)
- {
- kiss_fft_scalar re, im, yr, yi;
- kiss_twiddle_scalar t0, t1;
- re = yp0[0];
- im = yp0[1];
- t0 = t[i];
- t1 = t[N4+i];
- /* We'd scale up by 2 here, but instead it's done when mixing the windows */
- yr = S_MUL(re,t0) + S_MUL(im,t1);
- yi = S_MUL(re,t1) - S_MUL(im,t0);
- re = yp1[0];
- im = yp1[1];
- yp0[0] = yr;
- yp1[1] = yi;
-
- t0 = t[(N4-i-1)];
- t1 = t[(N2-i-1)];
- /* We'd scale up by 2 here, but instead it's done when mixing the windows */
- yr = S_MUL(re,t0) + S_MUL(im,t1);
- yi = S_MUL(re,t1) - S_MUL(im,t0);
- yp1[0] = yr;
- yp0[1] = yi;
- yp0 += 2;
- yp1 -= 2;
- }
- }
-
- /* Mirror on both sides for TDAC */
- {
- kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
- kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
- const opus_val16 * OPUS_RESTRICT wp1 = window;
- const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
-
- for(i = 0; i < overlap/2; i++)
- {
- kiss_fft_scalar x1, x2;
- x1 = *xp1;
- x2 = *yp1;
- *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
- *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
- wp1++;
- wp2--;
- }
- }
- RESTORE_STACK;
-}
--- a/celt_sources.mk
+++ b/celt_sources.mk
@@ -45,5 +45,5 @@
celt/arm/pitch_neon_intr.c
CELT_SOURCES_ARM_NE10 = \
-celt/arm/celt_ne10_fft.c \
-celt/arm/celt_ne10_mdct.c
+celt/arm/celt_fft_ne10.c \
+celt/arm/celt_mdct_ne10.c