shithub: opus

--- a/libcelt/Makefile.am

+++ b/libcelt/Makefile.am

@@ -15,7 +15,7 @@

 # Sources for compilation in the library

 libcelt@LIBCELT_SUFFIX@_la_SOURCES = bands.c celt.c cwrs.c ecintrin.h entcode.c \

-	entdec.c entenc.c header.c kiss_fft.c laplace.c mdct.c \

+	entdec.c entenc.c header.c kiss_fft.c laplace.c mathops.c mdct.c \

 	modes.c pitch.c plc.c quant_bands.c rangedec.c rangeenc.c rate.c \

 	vq.c

--- a/libcelt/bands.c

+++ b/libcelt/bands.c

@@ -45,6 +45,22 @@

 #include "mathops.h"

 #include "rate.h"

+/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness

+   with this approximation is important because it has an impact on the bit allocation */

+static celt_int16 bitexact_cos(celt_int16 x)

+{

+   celt_int32 tmp;

+   celt_int16 x2;

+   tmp = (4096+((celt_int32)(x)*(x)))>>13;

+   if (tmp > 32767)

+      tmp = 32767;

+   x2 = tmp;

+   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));

+   if (x2 > 32766)

+      x2 = 32766;

+   return 1+x2;

+}

 #ifdef FIXED_POINT

 /* Compute the amplitude (sqrt energy) in each of the bands */

--- /dev/null

+++ b/libcelt/mathops.c

@@ -1,0 +1,179 @@

+/* Copyright (c) 2002-2008 Jean-Marc Valin

+   Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2009 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/**

+   @file mathops.h

+   @brief Various math functions

+*/

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   - Neither the name of the Xiph.org Foundation nor the names of its

+   contributors may be used to endorse or promote products derived from

+   this software without specific prior written permission.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "mathops.h"

+#ifdef FIXED_POINT

+celt_word32 frac_div32(celt_word32 a, celt_word32 b)

+{

+   celt_word16 rcp;

+   celt_word32 result, rem;

+   int shift = 30-celt_ilog2(b);

+   a = SHL32(a,shift);

+   b = SHL32(b,shift);

+   /* 16-bit reciprocal */

+   rcp = ROUND16(celt_rcp(ROUND16(b,16)),2);

+   result = SHL32(MULT16_32_Q15(rcp, a),1);

+   rem = a-MULT32_32_Q31(result, b);

+   result += SHL32(MULT16_32_Q15(rcp, rem),1);

+   return result;

+}

+/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */

+celt_word16 celt_rsqrt_norm(celt_word32 x)

+{

+   celt_word16 n;

+   celt_word16 r;

+   celt_word16 r2;

+   celt_word16 y;

+   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */

+   n = x-32768;

+   /* Get a rough initial guess for the root.

+      The optimal minimax quadratic approximation (using relative error) is

+       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).

+      Coefficients here, and the final result r, are Q14.*/

+   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));

+   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.

+      We can compute the result from n and r using Q15 multiplies with some

+       adjustment, carefully done to avoid overflow.

+      Range of y is [-1564,1594]. */

+   r2 = MULT16_16_Q15(r, r);

+   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);

+   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).

+      This yields the Q14 reciprocal square root of the Q16 x, with a maximum

+       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a

+       peak absolute error of 2.26591/16384. */

+   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,

+              SUB16(MULT16_16_Q15(y, 12288), 16384))));

+}

+/** Sqrt approximation (QX input, QX/2 output) */

+celt_word32 celt_sqrt(celt_word32 x)

+{

+   int k;

+   celt_word16 n;

+   celt_word32 rt;

+   static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};

+   if (x==0)

+      return 0;

+   k = (celt_ilog2(x)>>1)-7;

+   x = VSHR32(x, (k<<1));

+   n = x-32768;

+   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],

+              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));

+   rt = VSHR32(rt,7-k);

+   return rt;

+}

+#define L1 32767

+#define L2 -7651

+#define L3 8277

+#define L4 -626

+static inline celt_word16 _celt_cos_pi_2(celt_word16 x)

+{

+   celt_word16 x2;

+   x2 = MULT16_16_P15(x,x);

+   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2

+                                                                                ))))))));

+}

+#undef L1

+#undef L2

+#undef L3

+#undef L4

+celt_word16 celt_cos_norm(celt_word32 x)

+{

+   x = x&0x0001ffff;

+   if (x>SHL32(EXTEND32(1), 16))

+      x = SUB32(SHL32(EXTEND32(1), 17),x);

+   if (x&0x00007fff)

+   {

+      if (x<SHL32(EXTEND32(1), 15))

+      {

+         return _celt_cos_pi_2(EXTRACT16(x));

+      } else {

+         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));

+      }

+   } else {

+      if (x&0x0000ffff)

+         return 0;

+      else if (x&0x0001ffff)

+         return -32767;

+      else

+         return 32767;

+   }

+}

+/** Reciprocal approximation (Q15 input, Q16 output) */

+celt_word32 celt_rcp(celt_word32 x)

+{

+   int i;

+   celt_word16 n;

+   celt_word16 r;

+   celt_assert2(x>0, "celt_rcp() only defined for positive values");

+   i = celt_ilog2(x);

+   /* n is Q15 with range [0,1). */

+   n = VSHR32(x,i-15)-32768;

+   /* Start with a linear approximation:

+      r = 1.8823529411764706-0.9411764705882353*n.

+      The coefficients and the result are Q14 in the range [15420,30840].*/

+   r = ADD16(30840, MULT16_16_Q15(-15420, n));

+   /* Perform two Newton iterations:

+      r -= r*((r*n)-1.Q15)

+         = r*((r*n)+(r-1.Q15)). */

+   r = SUB16(r, MULT16_16_Q15(r,

+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));

+   /* We subtract an extra 1 in the second iteration to avoid overflow; it also

+       neatly compensates for truncation error in the rest of the process. */

+   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,

+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));

+   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error

+       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute

+       error of 1.24665/32768. */

+   return VSHR32(EXTEND32(r),i-16);

+}

+#endif

--- a/libcelt/mathops.h

+++ b/libcelt/mathops.h

@@ -42,66 +42,13 @@

 #include "entcode.h"

 #include "os_support.h"

-#ifndef OVERRIDE_FIND_MAX16

-static inline int find_max16(celt_word16 *x, int len)

-{

-   celt_word16 max_corr=-VERY_LARGE16;

-   int i, id = 0;

-   for (i=0;i<len;i++)

-   {

-      if (x[i] > max_corr)

-      {

-         id = i;

-         max_corr = x[i];

-      }

-   }

-   return id;

-}

-#endif

-#ifndef OVERRIDE_FIND_MAX32

-static inline int find_max32(celt_word32 *x, int len)

-{

-   celt_word32 max_corr=-VERY_LARGE32;

-   int i, id = 0;

-   for (i=0;i<len;i++)

-   {

-      if (x[i] > max_corr)

-      {

-         id = i;

-         max_corr = x[i];

-      }

-   }

-   return id;

-}

-#endif

 /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */

 #define FRAC_MUL16(a,b) ((16384+((celt_int32)(celt_int16)(a)*(celt_int16)(b)))>>15)

-/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness

-   with this approximation is important because it has an impact on the bit allocation */

-static inline celt_int16 bitexact_cos(celt_int16 x)

-{

-   celt_int32 tmp;

-   celt_int16 x2;

-   tmp = (4096+((celt_int32)(x)*(x)))>>13;

-   if (tmp > 32767)

-      tmp = 32767;

-   x2 = tmp;

-   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));

-   if (x2 > 32766)

-      x2 = 32766;

-   return 1+x2;

-}

 #ifndef FIXED_POINT

 #define celt_sqrt(x) ((float)sqrt(x))

-#define celt_psqrt(x) ((float)sqrt(x))

 #define celt_rsqrt(x) (1.f/celt_sqrt(x))

 #define celt_rsqrt_norm(x) (celt_rsqrt(x))

 #define celt_acos acos

@@ -195,120 +142,13 @@

    return x <= 0 ? 0 : celt_ilog2(x);

-/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */

-static inline celt_word16 celt_rsqrt_norm(celt_word32 x)

-{

-   celt_word16 n;

-   celt_word16 r;

-   celt_word16 r2;

-   celt_word16 y;

-   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */

-   n = x-32768;

-   /* Get a rough initial guess for the root.

-      The optimal minimax quadratic approximation (using relative error) is

-       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).

-      Coefficients here, and the final result r, are Q14.*/

-   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));

-   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.

-      We can compute the result from n and r using Q15 multiplies with some

-       adjustment, carefully done to avoid overflow.

-      Range of y is [-1564,1594]. */

-   r2 = MULT16_16_Q15(r, r);

-   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);

-   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).

-      This yields the Q14 reciprocal square root of the Q16 x, with a maximum

-       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a

-       peak absolute error of 2.26591/16384. */

-   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,

-              SUB16(MULT16_16_Q15(y, 12288), 16384))));

-}

+celt_word16 celt_rsqrt_norm(celt_word32 x);

-/** Reciprocal sqrt approximation (Q30 input, Q0 output or equivalent) */

-static inline celt_word32 celt_rsqrt(celt_word32 x)

-{

-   int k;

-   k = celt_ilog2(x)>>1;

-   x = VSHR32(x, (k-7)<<1);

-   return PSHR32(celt_rsqrt_norm(x), k);

-}

+celt_word32 celt_sqrt(celt_word32 x);

-/** Sqrt approximation (QX input, QX/2 output) */

-static inline celt_word32 celt_sqrt(celt_word32 x)

-{

-   int k;

-   celt_word16 n;

-   celt_word32 rt;

-   static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};

-   if (x==0)

-      return 0;

-   k = (celt_ilog2(x)>>1)-7;

-   x = VSHR32(x, (k<<1));

-   n = x-32768;

-   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],

-              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));

-   rt = VSHR32(rt,7-k);

-   return rt;

-}

+celt_word16 celt_cos_norm(celt_word32 x);

-/** Sqrt approximation (QX input, QX/2 output) that assumes that the input is

-    strictly positive */

-static inline celt_word32 celt_psqrt(celt_word32 x)

-{

-   int k;

-   celt_word16 n;

-   celt_word32 rt;

-   static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};

-   k = (celt_ilog2(x)>>1)-7;

-   x = VSHR32(x, (k<<1));

-   n = x-32768;

-   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],

-              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));

-   rt = VSHR32(rt,7-k);

-   return rt;

-}

-#define L1 32767

-#define L2 -7651

-#define L3 8277

-#define L4 -626

-static inline celt_word16 _celt_cos_pi_2(celt_word16 x)

-{

-   celt_word16 x2;

-   x2 = MULT16_16_P15(x,x);

-   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2

-                                                                                ))))))));

-}

-#undef L1

-#undef L2

-#undef L3

-#undef L4

-static inline celt_word16 celt_cos_norm(celt_word32 x)

-{

-   x = x&0x0001ffff;

-   if (x>SHL32(EXTEND32(1), 16))

-      x = SUB32(SHL32(EXTEND32(1), 17),x);

-   if (x&0x00007fff)

-   {

-      if (x<SHL32(EXTEND32(1), 15))

-      {

-         return _celt_cos_pi_2(EXTRACT16(x));

-      } else {

-         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));

-      }

-   } else {

-      if (x&0x0000ffff)

-         return 0;

-      else if (x&0x0001ffff)

-         return -32767;

-      else

-         return 32767;

-   }

-}

 static inline celt_word16 celt_log2(celt_word32 x)

    int i;

@@ -349,52 +189,11 @@

    return VSHR32(EXTEND32(frac), -integer-2);

-/** Reciprocal approximation (Q15 input, Q16 output) */

-static inline celt_word32 celt_rcp(celt_word32 x)

-{

-   int i;

-   celt_word16 n;

-   celt_word16 r;

-   celt_assert2(x>0, "celt_rcp() only defined for positive values");

-   i = celt_ilog2(x);

-   /* n is Q15 with range [0,1). */

-   n = VSHR32(x,i-15)-32768;

-   /* Start with a linear approximation:

-      r = 1.8823529411764706-0.9411764705882353*n.

-      The coefficients and the result are Q14 in the range [15420,30840].*/

-   r = ADD16(30840, MULT16_16_Q15(-15420, n));

-   /* Perform two Newton iterations:

-      r -= r*((r*n)-1.Q15)

-         = r*((r*n)+(r-1.Q15)). */

-   r = SUB16(r, MULT16_16_Q15(r,

-             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));

-   /* We subtract an extra 1 in the second iteration to avoid overflow; it also

-       neatly compensates for truncation error in the rest of the process. */

-   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,

-             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));

-   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error

-       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute

-       error of 1.24665/32768. */

-   return VSHR32(EXTEND32(r),i-16);

-}

+celt_word32 celt_rcp(celt_word32 x);

 #define celt_div(a,b) MULT32_32_Q31((celt_word32)(a),celt_rcp(b))

-static inline celt_word32 frac_div32(celt_word32 a, celt_word32 b)

-{

-   celt_word16 rcp;

-   celt_word32 result, rem;

-   int shift = 30-celt_ilog2(b);

-   a = SHL32(a,shift);

-   b = SHL32(b,shift);

-   /* 16-bit reciprocal */

-   rcp = ROUND16(celt_rcp(ROUND16(b,16)),2);

-   result = SHL32(MULT16_32_Q15(rcp, a),1);

-   rem = a-MULT32_32_Q31(result, b);

-   result += SHL32(MULT16_32_Q15(rcp, rem),1);

-   return result;

-}

+celt_word32 frac_div32(celt_word32 a, celt_word32 b);

 #define M1 32767

 #define M2 -21

--- a/tests/dft-test.c

+++ b/tests/dft-test.c

@@ -8,6 +8,7 @@

 #define CELT_C

 #include "../libcelt/stack_alloc.h"

 #include "../libcelt/kiss_fft.c"

+#include "../libcelt/mathops.c"

 #include "../libcelt/entcode.c"

--- a/tests/mathops-test.c

+++ b/tests/mathops-test.c

@@ -2,7 +2,7 @@

 #include "config.h"

 #endif

-#include "mathops.h"

+#include "mathops.c"

 #include <stdio.h>

 #include <math.h>

@@ -56,24 +56,6 @@

-void testrsqrt(void)

-{

-   celt_int32 i;

-   for (i=1;i<=2000000;i++)

-   {

-      double ratio;

-      celt_word16 val;

-      val = celt_rsqrt(i);

-      ratio = val*sqrt(i)/Q15ONE;

-      if (fabs(ratio - 1) > .05)

-      {

-         fprintf (stderr, "rsqrt failed: rsqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);

-         ret = 1;

-      }

-      i+= i>>10;

-   }

-}

 #ifndef FIXED_POINT

 void testlog2(void)

@@ -179,7 +161,6 @@

    testdiv();

    testsqrt();

-   testrsqrt();

    testlog2();

    testexp2();

    testexp2log2();

--- a/tests/mdct-test.c

+++ b/tests/mdct-test.c

@@ -9,6 +9,7 @@

 #include "../libcelt/kiss_fft.c"

 #include "../libcelt/mdct.c"

+#include "../libcelt/mathops.c"

 #ifndef M_PI

 #define M_PI 3.141592653