ref: a9ffc14ab7f00496b41f84a89c395b839e8452c4
parent: ab4dcc5c90140f7a6fefdef2e2acfdf7ecc152c9
author: Timothy B. Terriberry <[email protected]>
date: Tue Oct 20 20:18:41 EDT 2009
Enhancements the fixed-point approximations of non-linear functions. Accuracy for rsqrt, rcp, cos, and log2 is now at the level of truncation error for the current output resolution of these functions. sqrt and exp2 still have non-trivial algebraic error, but this cannot be reduced much further using the current method without additional computation. Also updates the fast float approximations for log2 and exp2 with coefficients that give slightly lower maximum relative error. Patch modified by Jean-Marc Valin to leave the cos approximation as is and leave the check for x<-15 in exp2 as is.
--- a/libcelt/mathops.h
+++ b/libcelt/mathops.h
@@ -130,9 +130,9 @@
in.f = x;
integer = (in.i>>23)-127;
in.i -= integer<<23;
- frac = in.f - 1.5;
- /* -0.41446 0.96093 -0.33981 0.15600 */
- frac = -0.41446 + frac*(0.96093 + frac*(-0.33981 + frac*0.15600));
+ frac = in.f - 1.5f;
+ frac = -0.41445418f + frac*(0.95909232f
+ + frac*(-0.33951290f + frac*0.16541097f));
return 1+integer+frac;
}
@@ -150,7 +150,8 @@
return 0;
frac = x-integer;
/* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
- res.f = 1.f + frac * (0.696147f + frac * (0.224411f + 0.079442f*frac));
+ res.f = 0.99992522f + frac * (0.69583354f
+ + frac * (0.22606716f + 0.078024523f*frac));
res.i = (res.i + (integer<<23)) & 0x7fffffff;
return res.f;
}
@@ -199,22 +200,23 @@
/* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
n = x-32768;
/* Get a rough initial guess for the root.
- The optimal minimax quadratic approximation is
- r = 1.4288615575712422-n*(0.8452316405039975+n*0.4519141640876117).
+ The optimal minimax quadratic approximation (using relative error) is
+ r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
Coefficients here, and the final result r, are Q14.*/
- r = ADD16(23410, MULT16_16_Q15(n, ADD16(-13848, MULT16_16_Q15(n, 7405))));
+ r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
/* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
We can compute the result from n and r using Q15 multiplies with some
adjustment, carefully done to avoid overflow.
- Range of y is [-2014,2362]. */
+ Range of y is [-1564,1594]. */
r2 = MULT16_16_Q15(r, r);
y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
- /* Apply a 2nd-order Householder iteration: r' = r*(1+y*(-0.5+y*0.375)). */
+ /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). */
rt = ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
- ADD16(-16384, MULT16_16_Q15(y, 12288)))));
+ SUB16(MULT16_16_Q15(y, 12288), 16384))));
/* rt is now the Q14 reciprocal square root of the Q16 x, with a maximum
- error of 2.70970/16384 and a MSE of 0.587003/16384^2. */
- /* Most of the error in this approximation comes from the following shift: */
+ relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
+ peak absolute error of 2.26591/16384. */
+ /* Most of the error in this function comes from the following shift: */
rt = PSHR32(rt,k);
return rt;
}
@@ -225,7 +227,7 @@
int k;
celt_word16 n;
celt_word32 rt;
- const celt_word16 C[5] = {23174, 11584, -3011, 1570, -557};
+ const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
if (x==0)
return 0;
k = (celt_ilog2(x)>>1)-7;
@@ -244,7 +246,7 @@
int k;
celt_word16 n;
celt_word32 rt;
- const celt_word16 C[5] = {23174, 11584, -3011, 1570, -557};
+ const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
k = (celt_ilog2(x)>>1)-7;
x = VSHR32(x, (k<<1));
n = x-32768;
@@ -301,12 +303,14 @@
int i;
celt_word16 n, frac;
/*-0.41446 0.96093 -0.33981 0.15600 */
- const celt_word16 C[4] = {-6791, 7872, -1392, 319};
+ /* -0.4144541824871411+32/16384, 0.9590923197873218, -0.3395129038105771,
+ 0.16541096501128538 */
+ const celt_word16 C[4] = {-6758, 15715, -5563, 2708};
if (x==0)
return -32767;
i = celt_ilog2(x);
n = VSHR32(x,i-15)-32768-16384;
- frac = ADD16(C[0], MULT16_16_Q14(n, ADD16(C[1], MULT16_16_Q14(n, ADD16(C[2], MULT16_16_Q14(n, (C[3])))))));
+ frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, (C[3])))))));
return SHL16(i-13,8)+SHR16(frac,14-8);
}
@@ -316,10 +320,10 @@
K2 = 3-4*log(2)
K3 = 3*log(2) - 2
*/
-#define D0 16384
-#define D1 11356
-#define D2 3726
-#define D3 1301
+#define D0 16383
+#define D1 22804
+#define D2 14819
+#define D3 10204
/** Base-2 exponential approximation (2^x). (Q11 input, Q16 output) */
static inline celt_word32 celt_exp2(celt_word16 x)
{
@@ -331,7 +335,7 @@
else if (integer < -15)
return 0;
frac = SHL16(x-SHL16(integer,11),3);
- frac = ADD16(D0, MULT16_16_Q14(frac, ADD16(D1, MULT16_16_Q14(frac, ADD16(D2 , MULT16_16_Q14(D3,frac))))));
+ frac = ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
return VSHR32(EXTEND32(frac), -integer-2);
}
@@ -339,14 +343,29 @@
static inline celt_word32 celt_rcp(celt_word32 x)
{
int i;
- celt_word16 n, frac;
- const celt_word16 C[5] = {21848, -7251, 2403, -934, 327};
+ celt_word16 n;
+ celt_word16 r;
celt_assert2(x>0, "celt_rcp() only defined for positive values");
i = celt_ilog2(x);
- n = VSHR32(x,i-16)-SHL32(EXTEND32(3),15);
- frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
- MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
- return VSHR32(EXTEND32(frac),i-16);
+ /* n is Q15 with range [0,1). */
+ n = VSHR32(x,i-15)-32768;
+ /* Start with a linear approximation:
+ r = 1.8823529411764706-0.9411764705882353*n.
+ The coefficients and the result are Q14 in the range [15420,30840].*/
+ r = ADD16(30840, MULT16_16_Q15(-15420, n));
+ /* Perform two Newton iterations:
+ r -= r*((r*n)-1.Q15)
+ = r*((r*n)+(r-1.Q15)). */
+ r = SUB16(r, MULT16_16_Q15(r,
+ ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
+ /* We subtract an extra 1 in the second iteration to avoid overflow; it also
+ neatly compensates for truncation error in the rest of the process. */
+ r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
+ ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
+ /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
+ of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+ error of 1.24665/32768. */
+ return VSHR32(EXTEND32(r),i-16);
}
#define celt_div(a,b) MULT32_32_Q31((celt_word32)(a),celt_rcp(b))
--- a/tests/mathops-test.c
+++ b/tests/mathops-test.c
@@ -30,7 +30,7 @@
#else
prod = val*i;
#endif
- if (fabs(prod-1) > .001)
+ if (fabs(prod-1) > .00025)
{
fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod);
ret = 1;
@@ -47,7 +47,7 @@
celt_word16 val;
val = celt_sqrt(i);
ratio = val/sqrt(i);
- if (fabs(ratio - 1) > .001 && fabs(val-sqrt(i)) > 2)
+ if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2)
{
fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
ret = 1;
@@ -81,7 +81,7 @@
for (x=0.001;x<1677700.0;x+=(x/8.0))
{
float error = fabs((1.442695040888963387*log(x))-celt_log2(x));
- if (error>0.001)
+ if (error>0.0009)
{
fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error);
ret = 1;
@@ -95,7 +95,7 @@
for (x=-11.0;x<24.0;x+=0.0007)
{
float error = fabs(x-(1.442695040888963387*log(celt_exp2(x))));
- if (error>0.0005)
+ if (error>0.0002)
{
fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error);
ret = 1;
@@ -117,6 +117,49 @@
}
}
#else
+void testlog2(void)
+{
+ celt_word32 x;
+ for (x=8;x<1073741824;x+=(x>>3))
+ {
+ float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/256.0);
+ if (error>0.003)
+ {
+ fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error);
+ ret = 1;
+ }
+ }
+}
+
+void testexp2(void)
+{
+ celt_word16 x;
+ for (x=-32768;x<-30720;x++)
+ {
+ float error1 = fabs(x/2048.0-(1.442695040888963387*log(celt_exp2(x)/65536.0)));
+ float error2 = fabs(exp(0.6931471805599453094*x/2048.0)-celt_exp2(x)/65536.0);
+ if (error1>0.0002&&error2>0.00004)
+ {
+ fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2);
+ ret = 1;
+ }
+ }
+}
+
+void testexp2log2(void)
+{
+ celt_word32 x;
+ for (x=8;x<65536;x+=(x>>3))
+ {
+ float error = fabs(x-0.25*celt_exp2(celt_log2(x)<<3))/16384;
+ if (error>0.004)
+ {
+ fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error);
+ ret = 1;
+ }
+ }
+}
+
void testilog2(void)
{
celt_word32 x;
@@ -137,11 +180,10 @@
testdiv();
testsqrt();
testrsqrt();
-#ifndef FIXED_POINT
testlog2();
testexp2();
testexp2log2();
-#else
+#ifdef FIXED_POINT
testilog2();
#endif
return ret;