shithub: opus

Download patch

ref: 8c7bb4c9c7e2cb529a58e5dcdd8ce324081347c9
parent: 630ee44aaabbf1b8a0f16ca10d5cd481dc15b4e0
author: Timothy Terriberry <[email protected]>
date: Sat Oct 31 06:19:06 EDT 2009

Expose the normalized range for reciprocal square roots in fixed-point mode. This allows subsequnt calculations to use the full precision of the result.

--- a/libcelt/mathops.h
+++ b/libcelt/mathops.h
@@ -106,6 +106,7 @@
 #define celt_sqrt(x) ((float)sqrt(x))
 #define celt_psqrt(x) ((float)sqrt(x))
 #define celt_rsqrt(x) (1.f/celt_sqrt(x))
+#define celt_rsqrt_norm(x) (celt_rsqrt(x))
 #define celt_acos acos
 #define celt_exp exp
 #define celt_cos_norm(x) (cos((.5f*M_PI)*(x)))
@@ -186,17 +187,13 @@
    return x <= 0 ? 0 : celt_ilog2(x);
 }
 
-/** Reciprocal sqrt approximation (Q30 input, Q0 output or equivalent) */
-static inline celt_word32 celt_rsqrt(celt_word32 x)
+/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
+static inline celt_word16 celt_rsqrt_norm(celt_word32 x)
 {
-   int k;
    celt_word16 n;
    celt_word16 r;
    celt_word16 r2;
    celt_word16 y;
-   celt_word32 rt;
-   k = celt_ilog2(x)>>1;
-   x = VSHR32(x, (k-7)<<1);
    /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
    n = x-32768;
    /* Get a rough initial guess for the root.
@@ -210,15 +207,21 @@
       Range of y is [-1564,1594]. */
    r2 = MULT16_16_Q15(r, r);
    y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
-   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). */
-   rt = ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
-              SUB16(MULT16_16_Q15(y, 12288), 16384))));
-   /* rt is now the Q14 reciprocal square root of the Q16 x, with a maximum
+   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+      This yields the Q14 reciprocal square root of the Q16 x, with a maximum
        relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
        peak absolute error of 2.26591/16384. */
-   /* Most of the error in this function comes from the following shift: */
-   rt = PSHR32(rt,k);
-   return rt;
+   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
+              SUB16(MULT16_16_Q15(y, 12288), 16384))));
+}
+
+/** Reciprocal sqrt approximation (Q30 input, Q0 output or equivalent) */
+static inline celt_word32 celt_rsqrt(celt_word32 x)
+{
+   int k;
+   k = celt_ilog2(x)>>1;
+   x = VSHR32(x, (k-7)<<1);
+   return PSHR32(celt_rsqrt_norm(x), k);
 }
 
 /** Sqrt approximation (QX input, QX/2 output) */
--- a/libcelt/pitch.c
+++ b/libcelt/pitch.c
@@ -215,10 +215,21 @@
       Xr = MULT16_16_16(n, Xr);
       Xi = MULT16_16_16(n, Xi);
 #else
-      n = celt_rsqrt(EPSILON+curve[i]);
-      /* Pre-multiply X by n, so we can keep everything in 16 bits */
-      Xr = EXTRACT16(SHR32(MULT16_16(n, Xr),3));
-      Xi = EXTRACT16(SHR32(MULT16_16(n, Xi),3));
+      {
+         celt_word32 t;
+#ifdef FIXED_POINT
+         int k;
+#endif
+         t = EPSILON+curve[i];
+#ifdef FIXED_POINT
+         k = celt_ilog2(t)>>1;
+#endif
+         t = VSHR32(t, (k-7)<<1);
+         n = celt_rsqrt_norm(t);
+         /* Pre-multiply X by n, so we can keep everything in 16 bits */
+         Xr = EXTRACT16(PSHR32(MULT16_16(n, Xr),3+k));
+         Xi = EXTRACT16(PSHR32(MULT16_16(n, Xi),3+k));
+      }
 #endif
       /* Cross-spectrum between X and conj(Y) */
       *Xptr++ = ADD16(MULT16_16_Q15(Xr, Yptr[0]), MULT16_16_Q15(Xi,Yptr[1]));
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -103,13 +103,21 @@
 static void normalise_residual(int * restrict iy, celt_norm * restrict X, int N, int K, celt_word32 Ryy)
 {
    int i;
-   celt_word32 g;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   celt_word32 t;
+   celt_word16 g;
 
-   g = celt_rsqrt(Ryy);
+#ifdef FIXED_POINT
+   k = celt_ilog2(Ryy)>>1;
+#endif
+   t = VSHR32(Ryy, (k-7)<<1);
+   g = celt_rsqrt_norm(t);
 
    i=0;
    do
-      X[i] = EXTRACT16(SHR32(MULT16_16(g, iy[i]),1));
+      X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
    while (++i < N);
 }