ref: 48923ae9966c4c065c7cea776944155ca0617f34
parent: 802f6eab9b75db906869262a6387cf5209b28bb4
author: Jean-Marc Valin <[email protected]>
date: Fri Jul 23 13:28:50 EDT 2010
Cleanup, de-inlining some math functions
--- a/libcelt/Makefile.am
+++ b/libcelt/Makefile.am
@@ -15,7 +15,7 @@
# Sources for compilation in the library
libcelt@LIBCELT_SUFFIX@_la_SOURCES = bands.c celt.c cwrs.c ecintrin.h entcode.c \
- entdec.c entenc.c header.c kiss_fft.c laplace.c mdct.c \
+ entdec.c entenc.c header.c kiss_fft.c laplace.c mathops.c mdct.c \
modes.c pitch.c plc.c quant_bands.c rangedec.c rangeenc.c rate.c \
vq.c
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -45,6 +45,22 @@
#include "mathops.h"
#include "rate.h"
+/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
+ with this approximation is important because it has an impact on the bit allocation */
+static celt_int16 bitexact_cos(celt_int16 x)
+{
+ celt_int32 tmp;
+ celt_int16 x2;
+ tmp = (4096+((celt_int32)(x)*(x)))>>13;
+ if (tmp > 32767)
+ tmp = 32767;
+ x2 = tmp;
+ x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
+ if (x2 > 32766)
+ x2 = 32766;
+ return 1+x2;
+}
+
#ifdef FIXED_POINT
/* Compute the amplitude (sqrt energy) in each of the bands */
--- /dev/null
+++ b/libcelt/mathops.c
@@ -1,0 +1,179 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+ Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file mathops.h
+ @brief Various math functions
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+
+#ifdef FIXED_POINT
+
+celt_word32 frac_div32(celt_word32 a, celt_word32 b)
+{
+ celt_word16 rcp;
+ celt_word32 result, rem;
+ int shift = 30-celt_ilog2(b);
+ a = SHL32(a,shift);
+ b = SHL32(b,shift);
+
+ /* 16-bit reciprocal */
+ rcp = ROUND16(celt_rcp(ROUND16(b,16)),2);
+ result = SHL32(MULT16_32_Q15(rcp, a),1);
+ rem = a-MULT32_32_Q31(result, b);
+ result += SHL32(MULT16_32_Q15(rcp, rem),1);
+ return result;
+}
+
+/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
+celt_word16 celt_rsqrt_norm(celt_word32 x)
+{
+ celt_word16 n;
+ celt_word16 r;
+ celt_word16 r2;
+ celt_word16 y;
+ /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
+ n = x-32768;
+ /* Get a rough initial guess for the root.
+ The optimal minimax quadratic approximation (using relative error) is
+ r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
+ Coefficients here, and the final result r, are Q14.*/
+ r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
+ /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
+ We can compute the result from n and r using Q15 multiplies with some
+ adjustment, carefully done to avoid overflow.
+ Range of y is [-1564,1594]. */
+ r2 = MULT16_16_Q15(r, r);
+ y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
+ /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+ This yields the Q14 reciprocal square root of the Q16 x, with a maximum
+ relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
+ peak absolute error of 2.26591/16384. */
+ return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
+ SUB16(MULT16_16_Q15(y, 12288), 16384))));
+}
+
+/** Sqrt approximation (QX input, QX/2 output) */
+celt_word32 celt_sqrt(celt_word32 x)
+{
+ int k;
+ celt_word16 n;
+ celt_word32 rt;
+ static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
+ if (x==0)
+ return 0;
+ k = (celt_ilog2(x)>>1)-7;
+ x = VSHR32(x, (k<<1));
+ n = x-32768;
+ rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
+ MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
+ rt = VSHR32(rt,7-k);
+ return rt;
+}
+
+#define L1 32767
+#define L2 -7651
+#define L3 8277
+#define L4 -626
+
+static inline celt_word16 _celt_cos_pi_2(celt_word16 x)
+{
+ celt_word16 x2;
+
+ x2 = MULT16_16_P15(x,x);
+ return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
+ ))))))));
+}
+
+#undef L1
+#undef L2
+#undef L3
+#undef L4
+
+celt_word16 celt_cos_norm(celt_word32 x)
+{
+ x = x&0x0001ffff;
+ if (x>SHL32(EXTEND32(1), 16))
+ x = SUB32(SHL32(EXTEND32(1), 17),x);
+ if (x&0x00007fff)
+ {
+ if (x<SHL32(EXTEND32(1), 15))
+ {
+ return _celt_cos_pi_2(EXTRACT16(x));
+ } else {
+ return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
+ }
+ } else {
+ if (x&0x0000ffff)
+ return 0;
+ else if (x&0x0001ffff)
+ return -32767;
+ else
+ return 32767;
+ }
+}
+
+/** Reciprocal approximation (Q15 input, Q16 output) */
+celt_word32 celt_rcp(celt_word32 x)
+{
+ int i;
+ celt_word16 n;
+ celt_word16 r;
+ celt_assert2(x>0, "celt_rcp() only defined for positive values");
+ i = celt_ilog2(x);
+ /* n is Q15 with range [0,1). */
+ n = VSHR32(x,i-15)-32768;
+ /* Start with a linear approximation:
+ r = 1.8823529411764706-0.9411764705882353*n.
+ The coefficients and the result are Q14 in the range [15420,30840].*/
+ r = ADD16(30840, MULT16_16_Q15(-15420, n));
+ /* Perform two Newton iterations:
+ r -= r*((r*n)-1.Q15)
+ = r*((r*n)+(r-1.Q15)). */
+ r = SUB16(r, MULT16_16_Q15(r,
+ ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
+ /* We subtract an extra 1 in the second iteration to avoid overflow; it also
+ neatly compensates for truncation error in the rest of the process. */
+ r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
+ ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
+ /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
+ of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+ error of 1.24665/32768. */
+ return VSHR32(EXTEND32(r),i-16);
+}
+
+#endif
--- a/libcelt/mathops.h
+++ b/libcelt/mathops.h
@@ -42,66 +42,13 @@
#include "entcode.h"
#include "os_support.h"
-
-
-#ifndef OVERRIDE_FIND_MAX16
-static inline int find_max16(celt_word16 *x, int len)
-{
- celt_word16 max_corr=-VERY_LARGE16;
- int i, id = 0;
- for (i=0;i<len;i++)
- {
- if (x[i] > max_corr)
- {
- id = i;
- max_corr = x[i];
- }
- }
- return id;
-}
-#endif
-
-#ifndef OVERRIDE_FIND_MAX32
-static inline int find_max32(celt_word32 *x, int len)
-{
- celt_word32 max_corr=-VERY_LARGE32;
- int i, id = 0;
- for (i=0;i<len;i++)
- {
- if (x[i] > max_corr)
- {
- id = i;
- max_corr = x[i];
- }
- }
- return id;
-}
-#endif
-
/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
#define FRAC_MUL16(a,b) ((16384+((celt_int32)(celt_int16)(a)*(celt_int16)(b)))>>15)
-/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
- with this approximation is important because it has an impact on the bit allocation */
-static inline celt_int16 bitexact_cos(celt_int16 x)
-{
- celt_int32 tmp;
- celt_int16 x2;
- tmp = (4096+((celt_int32)(x)*(x)))>>13;
- if (tmp > 32767)
- tmp = 32767;
- x2 = tmp;
- x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
- if (x2 > 32766)
- x2 = 32766;
- return 1+x2;
-}
-
#ifndef FIXED_POINT
#define celt_sqrt(x) ((float)sqrt(x))
-#define celt_psqrt(x) ((float)sqrt(x))
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
#define celt_rsqrt_norm(x) (celt_rsqrt(x))
#define celt_acos acos
@@ -195,120 +142,13 @@
return x <= 0 ? 0 : celt_ilog2(x);
}
-/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
-static inline celt_word16 celt_rsqrt_norm(celt_word32 x)
-{
- celt_word16 n;
- celt_word16 r;
- celt_word16 r2;
- celt_word16 y;
- /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
- n = x-32768;
- /* Get a rough initial guess for the root.
- The optimal minimax quadratic approximation (using relative error) is
- r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
- Coefficients here, and the final result r, are Q14.*/
- r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
- /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
- We can compute the result from n and r using Q15 multiplies with some
- adjustment, carefully done to avoid overflow.
- Range of y is [-1564,1594]. */
- r2 = MULT16_16_Q15(r, r);
- y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
- /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
- This yields the Q14 reciprocal square root of the Q16 x, with a maximum
- relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
- peak absolute error of 2.26591/16384. */
- return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
- SUB16(MULT16_16_Q15(y, 12288), 16384))));
-}
+celt_word16 celt_rsqrt_norm(celt_word32 x);
-/** Reciprocal sqrt approximation (Q30 input, Q0 output or equivalent) */
-static inline celt_word32 celt_rsqrt(celt_word32 x)
-{
- int k;
- k = celt_ilog2(x)>>1;
- x = VSHR32(x, (k-7)<<1);
- return PSHR32(celt_rsqrt_norm(x), k);
-}
+celt_word32 celt_sqrt(celt_word32 x);
-/** Sqrt approximation (QX input, QX/2 output) */
-static inline celt_word32 celt_sqrt(celt_word32 x)
-{
- int k;
- celt_word16 n;
- celt_word32 rt;
- static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
- if (x==0)
- return 0;
- k = (celt_ilog2(x)>>1)-7;
- x = VSHR32(x, (k<<1));
- n = x-32768;
- rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
- MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
- rt = VSHR32(rt,7-k);
- return rt;
-}
+celt_word16 celt_cos_norm(celt_word32 x);
-/** Sqrt approximation (QX input, QX/2 output) that assumes that the input is
- strictly positive */
-static inline celt_word32 celt_psqrt(celt_word32 x)
-{
- int k;
- celt_word16 n;
- celt_word32 rt;
- static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
- k = (celt_ilog2(x)>>1)-7;
- x = VSHR32(x, (k<<1));
- n = x-32768;
- rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
- MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
- rt = VSHR32(rt,7-k);
- return rt;
-}
-#define L1 32767
-#define L2 -7651
-#define L3 8277
-#define L4 -626
-
-static inline celt_word16 _celt_cos_pi_2(celt_word16 x)
-{
- celt_word16 x2;
-
- x2 = MULT16_16_P15(x,x);
- return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
- ))))))));
-}
-
-#undef L1
-#undef L2
-#undef L3
-#undef L4
-
-static inline celt_word16 celt_cos_norm(celt_word32 x)
-{
- x = x&0x0001ffff;
- if (x>SHL32(EXTEND32(1), 16))
- x = SUB32(SHL32(EXTEND32(1), 17),x);
- if (x&0x00007fff)
- {
- if (x<SHL32(EXTEND32(1), 15))
- {
- return _celt_cos_pi_2(EXTRACT16(x));
- } else {
- return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
- }
- } else {
- if (x&0x0000ffff)
- return 0;
- else if (x&0x0001ffff)
- return -32767;
- else
- return 32767;
- }
-}
-
static inline celt_word16 celt_log2(celt_word32 x)
{
int i;
@@ -349,52 +189,11 @@
return VSHR32(EXTEND32(frac), -integer-2);
}
-/** Reciprocal approximation (Q15 input, Q16 output) */
-static inline celt_word32 celt_rcp(celt_word32 x)
-{
- int i;
- celt_word16 n;
- celt_word16 r;
- celt_assert2(x>0, "celt_rcp() only defined for positive values");
- i = celt_ilog2(x);
- /* n is Q15 with range [0,1). */
- n = VSHR32(x,i-15)-32768;
- /* Start with a linear approximation:
- r = 1.8823529411764706-0.9411764705882353*n.
- The coefficients and the result are Q14 in the range [15420,30840].*/
- r = ADD16(30840, MULT16_16_Q15(-15420, n));
- /* Perform two Newton iterations:
- r -= r*((r*n)-1.Q15)
- = r*((r*n)+(r-1.Q15)). */
- r = SUB16(r, MULT16_16_Q15(r,
- ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
- /* We subtract an extra 1 in the second iteration to avoid overflow; it also
- neatly compensates for truncation error in the rest of the process. */
- r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
- ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
- /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
- of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
- error of 1.24665/32768. */
- return VSHR32(EXTEND32(r),i-16);
-}
+celt_word32 celt_rcp(celt_word32 x);
#define celt_div(a,b) MULT32_32_Q31((celt_word32)(a),celt_rcp(b))
-static inline celt_word32 frac_div32(celt_word32 a, celt_word32 b)
-{
- celt_word16 rcp;
- celt_word32 result, rem;
- int shift = 30-celt_ilog2(b);
- a = SHL32(a,shift);
- b = SHL32(b,shift);
-
- /* 16-bit reciprocal */
- rcp = ROUND16(celt_rcp(ROUND16(b,16)),2);
- result = SHL32(MULT16_32_Q15(rcp, a),1);
- rem = a-MULT32_32_Q31(result, b);
- result += SHL32(MULT16_32_Q15(rcp, rem),1);
- return result;
-}
+celt_word32 frac_div32(celt_word32 a, celt_word32 b);
#define M1 32767
#define M2 -21
--- a/tests/dft-test.c
+++ b/tests/dft-test.c
@@ -8,6 +8,7 @@
#define CELT_C
#include "../libcelt/stack_alloc.h"
#include "../libcelt/kiss_fft.c"
+#include "../libcelt/mathops.c"
#include "../libcelt/entcode.c"
--- a/tests/mathops-test.c
+++ b/tests/mathops-test.c
@@ -2,7 +2,7 @@
#include "config.h"
#endif
-#include "mathops.h"
+#include "mathops.c"
#include <stdio.h>
#include <math.h>
@@ -56,24 +56,6 @@
}
}
-void testrsqrt(void)
-{
- celt_int32 i;
- for (i=1;i<=2000000;i++)
- {
- double ratio;
- celt_word16 val;
- val = celt_rsqrt(i);
- ratio = val*sqrt(i)/Q15ONE;
- if (fabs(ratio - 1) > .05)
- {
- fprintf (stderr, "rsqrt failed: rsqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
- ret = 1;
- }
- i+= i>>10;
- }
-}
-
#ifndef FIXED_POINT
void testlog2(void)
{
@@ -179,7 +161,6 @@
{
testdiv();
testsqrt();
- testrsqrt();
testlog2();
testexp2();
testexp2log2();
--- a/tests/mdct-test.c
+++ b/tests/mdct-test.c
@@ -9,6 +9,7 @@
#include "../libcelt/kiss_fft.c"
#include "../libcelt/mdct.c"
+#include "../libcelt/mathops.c"
#ifndef M_PI
#define M_PI 3.141592653