shithub: mc

--- a/lib/math/exp-impl.myr

+++ b/lib/math/exp-impl.myr

@@ -1,11 +1,20 @@

 use std

 use "fpmath"

+use "util"

-/* See [Mul16] (6.2.1), and in particular [Tan89] (which this notation follows) */

+/*

+    See [Mul16] (6.2.1), [Tan89], and [Tan92]. While the exp and

+    expm1 functions are similar enough to be in the same file (Tang's

+    algorithms use the same S constants), they are not quite similar

+    enough to be in the same function.

+ */

 pkg math =

 	pkglocal const exp32 : (f : flt32 -> flt32)

 	pkglocal const exp64 : (f : flt64 -> flt64)

+	pkglocal const expm132 : (f : flt32 -> flt32)

+	pkglocal const expm164 : (f : flt64 -> flt64)

;;

 extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)

@@ -22,37 +31,63 @@

 	tobits : (f : @f -> @u)

 	frombits : (u : @u -> @f)

 	inf : @u

+	nan : @u

+	/* For exp */

 	thresh_1_min : @u

 	thresh_1_max : @u

 	thresh_2 : @u

+	Ai : @u[:]

+	/* For expm1 */

+	thresh_tiny : @u

+	thresh_huge_neg : @u

+	Log3_4 : @u

+	Log5_4 : @u

+	Bi : @u[:]

+	exactness_thresh : @u

+	/* For both */

+	nabs : @u

 	inv_L : @u

 	L1 : @u

 	L2 : @u

-	nabs : @u

-	Ai : @u[:]

 	S : (@u, @u)[32]

;;

-/* Coefficients for p(t), locally approximating exp1m */

-const Ai32 : uint32[:] = [

-		0x00000000,

-		0x00000000,

+const desc32 : fltdesc(flt32, uint32, int32) = [

+	.explode = std.flt32explode,

+	.assem = std.flt32assem,

+	.fma = fma32,

+	.horner = horner_polyu32,

+	.sgnmask = (1 << 31),

+	.tobits = std.flt32bits,

+	.frombits = std.flt32frombits,

+	.inf = 0x7f800000,

+	.nan = 0x7fc0000,

+	.thresh_1_min = 0xc2cff1b4, /* ln(2^(-127 - 23)) ~= -103.9720770839917 */

+	.thresh_1_max = 0x42b17218, /* ln([2 - 2^(-24)] * 2^127) ~= 88.72283905206 */

+	.inv_L = 0x4238aa3b, /* L = log(2) / 32, this is 1/L in working precision */

+	.L1 = 0x3cb17200, /* L1 and L2 sum to give L in high precision, */

+	.L2 = 0x333fbe8e, /* and L1 has some trailing zeroes. */

+	.nabs = 9, /* L1 has 9 trailing zeroes in binary */

+	.Ai = [ /* Coefficients for approximating expm1 in a tiny range */

 		0x3f000044,

 		0x3e2aaaec,

-	][:]

-const Ai64 : uint64[:] = [

-		0x0000000000000000,

-		0x0000000000000000,

-		0x3fe0000000000000,

-		0x3fc5555555548f7c,

-		0x3fa5555555545d4e,

-		0x3f811115b7aa905e,

-		0x3f56c1728d739765,

-	][:]

-/* Double-precise expansions of 2^(J/32) */

-const S32 : (uint32, uint32)[32] = [

+	][:],

+	.Log3_4 = 0xbe934b11, /* log(1 - 1/4) < x < log(1 + 1/4) */

+	.Log5_4 = 0x3e647fbf, /* triggers special expm1 case */

+	.thresh_tiny = 0x33000000, /* similar to thresh_1_{min,max}, but for expm1 */

+	.thresh_huge_neg = 0xc18aa122,

+	.Bi = [ /* Coefficients for approximating expm1 between log(3/4) and log(5/4) */

+		0x3e2aaaaa,

+		0x3d2aaaa0,

+		0x3c0889ff,

+		0x3ab64de5,

+		0x394ab327,

+	][:],

+	.S = [ /* Double-precise expansions of 2^(J/32) */

 		(0x3f800000, 0x00000000),

 		(0x3f82cd80, 0x35531585),

 		(0x3f85aac0, 0x34d9f312),

@@ -85,9 +120,49 @@

 		(0x3fefe480, 0x36e66f73),

 		(0x3ff52540, 0x36f45492),

 		(0x3ffa8380, 0x36cb6dc9),

-	]

+	],

+	.exactness_thresh = 24, /* threshold to prevent underflow in a S[high] + 2^-m */

+]

-const S64 : (uint64, uint64)[32] = [

+const desc64 : fltdesc(flt64, uint64, int64) = [

+	.explode = std.flt64explode,

+	.assem = std.flt64assem,

+	.fma = fma64,

+	.horner = horner_polyu64,

+	.sgnmask = (1 << 63),

+	.tobits = std.flt64bits,

+	.frombits = std.flt64frombits,

+	.inf = 0x7ff0000000000000,

+	.nan = 0x7ff8000000000000,

+	.thresh_1_min = 0xc0874910d52d3052, /* ln(2^(-1023 - 52)) ~= -745.1332191019 */

+	.thresh_1_max = 0x40862e42fefa39ef, /* ln([2 - 2^(-53)] * 2^1023) ~= 709.78271289 */

+	.inv_L = 0x40471547652b82fe, /* below as in exp32 */

+	.L1 = 0x3f962e42fef00000,

+	.L2 = 0x3d8473de6af278ed,

+	.nabs = 20,

+	.Ai = [

+		0x3fe0000000000000,

+		0x3fc5555555548f7c,

+		0x3fa5555555545d4e,

+		0x3f811115b7aa905e,

+		0x3f56c1728d739765,

+	][:],

+	.Log3_4 = 0xbfd269621134db93,

+	.Log5_4 = 0x3fcc8ff7c79a9a22,

+	.thresh_tiny = 0x3c90000000000000,

+	.thresh_huge_neg = 0xc042b708872320e1,

+	.Bi = [

+		0x3fc5555555555549,

+		0x3fa55555555554b6,

+		0x3f8111111111a9f3,

+		0x3f56c16c16ce14c6,

+		0x3f2a01a01159dd2d,

+		0x3efa019f635825c4,

+		0x3ec71e14bfe3db59,

+		0x3e928295484734ea,

+		0x3e5a2836aa646b96,

+	][:],

+	.S = [

 		(0x3ff0000000000000, 0x0000000000000000),

 		(0x3ff059b0d3158540, 0x3d0a1d73e2a475b4),

 		(0x3ff0b5586cf98900, 0x3ceec5317256e308),

@@ -120,52 +195,16 @@

 		(0x3ffdfc97337b9b40, 0x3cfeb968cac39ed3),

 		(0x3ffea4afa2a490c0, 0x3cf9858f73a18f5e),

 		(0x3fff50765b6e4540, 0x3c99d3e12dd8a18b),

-	]

+	],

+	.exactness_thresh = 53,

+]

 const exp32 = {f : flt32

-	const d : fltdesc(flt32, uint32, int32) = [

-		.explode = std.flt32explode,

-		.assem = std.flt32assem,

-		.fma = fma32,

-		.horner = horner_polyu32,

-		.sgnmask = (1 << 31),

-		.tobits = std.flt32bits,

-		.frombits = std.flt32frombits,

-		.inf = 0x7f800000,

-		.thresh_1_min = 0xc2cff1b4, /* ln(2^(-127 - 23)) ~= -103.9720770839917 */

-		.thresh_1_max = 0x42b17218, /* ln([2 - 2^(-24)] * 2^127) ~= 88.72283905206 */

-		.inv_L = 0x4238aa3b, /* L = log(2) / 32, this is 1/L in working precision */

-		.L1 = 0x3cb17200, /* L1 and L2 sum to give L in high precision, and L1 */

-		.L2 = 0x333fbe8e, /* has some trailing zeroes. */

-		.nabs = 9, /* L1 has 9 trailing zeroes in binary */

-		.Ai = Ai32,

-		.S = S32,

-	]

-	-> expgen(f, d)

+	-> expgen(f, desc32)

 const exp64 = {f : flt64

-	const d : fltdesc(flt64, uint64, int64) = [

-		.explode = std.flt64explode,

-		.assem = std.flt64assem,

-		.fma = fma64,

-		.horner = horner_polyu64,

-		.sgnmask = (1 << 63),

-		.tobits = std.flt64bits,

-		.frombits = std.flt64frombits,

-		.inf = 0x7ff0000000000000,

-		.thresh_1_min = 0xc0874910d52d3052, /* ln(2^(-1023 - 52)) ~= -745.1332191019 */

-		.thresh_1_max = 0x40862e42fefa39ef, /* ln([2 - 2^(-53)] * 2^1023) ~= 709.78271289 */

-		.inv_L = 0x40471547652b82fe, /* as in exp32 */

-		.L1 = 0x3f962e42fef00000,

-		.L2 = 0x3d8473de6af278ed,

-		.nabs = 20,

-		.Ai = Ai64,

-		.S = S64,

-	]

-	-> expgen(f, d)

+	-> expgen(f, desc64)

 generic expgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i

@@ -180,7 +219,7 @@

*/

 	if !n && b >= d.thresh_1_max

 		-> d.frombits(d.inf)

-	elif n && b >= d.thresh_1_min

+	elif n && b > d.thresh_1_min

 		-> (0.0 : @f)

;;

@@ -191,9 +230,8 @@

 	/* Argument reduction to [ -ln(2)/64, ln(2)/64 ] */

 	var inv_L = d.frombits(d.inv_L)

-	var finv_L = f * inv_L

-	var N = rn(finv_L)

+	var N = rn(f * inv_L)

 	var N2  = N % (32 : @i)

 	if N2 < 0

 		N2 += (32 : @i)

@@ -220,7 +258,7 @@

 	/* Compute a polynomial approximation of exp1m */

 	var R = R1 + R2

-	var Q = d.horner(R, d.Ai)

+	var Q = R * R * d.horner(R, d.Ai)

 	var P = R1 + (R2 + Q)

 	/* Expand out from reduced range */

@@ -234,6 +272,127 @@

 	var nu, eu, su

 	(nu, eu, su) = d.explode(unscaled)

 	var exp = d.assem(nu, eu + M, su)

+	if (d.tobits(exp) == 0)

+		/* Make sure we don't quite return 0 */

+		-> d.frombits(1)

+	;;

 	-> exp

+}

+const expm132 = {f : flt32

+	-> expm1gen(f, desc32)

+}

+const expm164 = {f : flt64

+	-> expm1gen(f, desc64)

+}

+generic expm1gen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i

+	var b = d.tobits(f)

+	var n, e, s

+	(n, e, s) = d.explode(f)

+	/* Special cases: +/- 0, inf, NaN, tiny, and huge */

+	if (b & ~d.sgnmask == 0)

+		-> f

+	elif n && (b & ~d.sgnmask == d.inf)

+		-> (-1.0 : @f)

+	elif (b & ~d.sgnmask == d.inf)

+		-> f

+	elif std.isnan(f)

+		-> d.frombits(d.nan)

+	elif (b & ~d.sgnmask) <= d.thresh_tiny

+		var two_to_large = d.assem(false, 100, 0)

+		var two_to_small = d.assem(false, -100, 0)

+		var abs_f = d.assem(false, e, s)

+		-> (two_to_large * f + abs_f) * two_to_small

+	elif !n && b >= d.thresh_1_max /* exp(x) = oo <=> expm1(x) = oo, as it turns out */

+		-> d.frombits(d.inf)

+	elif n && b >= d.thresh_huge_neg

+		-> (-1.0 : @f)

+	;;

+	if ((n && b < d.Log3_4) || (!n && b < d.Log5_4))

+		/* Procedure 2 */

+		/* compute x^2  / 2 with extra precision */

+		var u = round(f, d)

+		var v = f - u

+		var y = u * u * (0.5 : @f)

+		var z = v * (f + u) * (0.5 : @f)

+		var q = f * f * f * d.horner(f, d.Bi)

+		var yn, ye, ys

+		(yn, ye, ys) = d.explode(y)

+		if (ye >= -7)

+			-> (u + y) + (q + (v  + z))

+		else

+			-> f + (y + (q + z))

+		;;

+	;;

+	/* Procedure 1 */

+	var inv_L = d.frombits(d.inv_L)

+	var N = rn(f * inv_L)

+	var N2 = N % (32 : @i)

+	if N2 < 0

+		N2 += (32 : @i)

+	;;

+	var N1 = N - N2

+	var R1 : @f, R2 : @f

+	/*

+	   As in the exp case, R1 + R2 very well approximates f

+	   reduced into [ -ln(2)/64, ln(2)/64 ]

+	 */

+	if std.abs(N) >= (1 << d.nabs)

+		R1 = (f - (N1 : @f) * d.frombits(d.L1)) - ((N2 : @f) * d.frombits(d.L1))

+	else

+		R1 = f - (N : @f) * d.frombits(d.L1)

+	;;

+	R2 = -1.0 * (N : @f) * d.frombits(d.L2)

+	var M = (N1 : @i) / 32

+	var J = (N2 : std.size)

+	/* Compute a polynomial approximation of exp1m */

+	var R = R1 + R2

+	var Q = R * R * d.horner(R, d.Ai)

+	var P = R1 + (R2 + Q)

+	/* Expand out */

+	var Su_hi, Su_lo

+	(Su_hi, Su_lo) = d.S[J]

+	var S_hi = d.frombits(Su_hi)

+	var S_lo = d.frombits(Su_lo)

+	var S = S_hi + S_lo

+	/*

+	   Note that [Tan92] includes cases to avoid tripping the

+	   underflow flag when not technically required. We currently

+	   do not care about such flags, so that logic is omitted.

+	 */

+	var two_m = d.assem(false, M, 0)

+	var two_neg_m = d.assem(false, -M, 0)

+	if M >= d.exactness_thresh

+		-> two_m * (S_hi  + (S * P + (S_lo - two_neg_m)))

+	elif M <= -8

+		-> two_m * (S_hi  + (S * P + S_lo)) - (1.0 : @f)

+	;;

+	-> two_m * ((S_hi - two_neg_m) + (S_hi * P + S_lo * (P + (1.0 : @f))))

+}

+generic round = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i

+	var b = d.tobits(f)

+	var n, e, s

+	(n, e, s) = d.explode(f)

+	var mask = ~((1 << d.nabs) - 1)

+	if need_round_away(0, ((s & mask) : uint64), (d.nabs : int64) + 1)

+		-> d.assem(n, e, 1 + s & mask)

+	;;

+	-> d.assem(n, e, s & mask)

--- a/lib/math/fpmath.myr

+++ b/lib/math/fpmath.myr

@@ -5,6 +5,7 @@

 		/* exp-impl */

 		exp : (f : @f -> @f)

+		expm1 : (f : @f -> @f)

 		/* fma-impl */

 		fma : (x : @f, y : @f, z : @f -> @f)

@@ -67,6 +68,7 @@

 	fma = {x, y, z; -> fma32(x, y, z)}

 	exp = {f; -> exp32(f)}

+	expm1 = {f; -> expm132(f)}

 	horner_poly = {f, a; -> horner_poly32(f, a)}

 	horner_polyu = {f, a; -> horner_polyu32(f, a)}

@@ -86,6 +88,7 @@

 	fma = {x, y, z; -> fma64(x, y, z)}

 	exp = {f; -> exp64(f)}

+	expm1 = {f; -> expm164(f)}

 	horner_poly = {f, a; -> horner_poly64(f, a)}

 	horner_polyu = {f, a; -> horner_polyu64(f, a)}

@@ -105,6 +108,9 @@

 extern const exp32 : (x : flt32 -> flt32)

 extern const exp64 : (x : flt64 -> flt64)

+extern const expm132 : (x : flt32 -> flt32)

+extern const expm164 : (x : flt64 -> flt64)

 extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)

 extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)

--- a/lib/math/references

+++ b/lib/math/references

@@ -19,3 +19,9 @@

 Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.

 Softw. 15.2 (June 1989), pp. 144–157. issn: 0098-3500.  doi:

 10.1145/63522.214389. url: http://doi.acm.org/10.1145/63522.214389.

+[Tan92]

+Ping Tak Peter Tang. “Table-driven Implementation of the Expm1

+Function in IEEE Floating- point Arithmetic”. In: ACM Trans. Math.

+Softw. 18.2 (June 1992), pp. 211–222. issn: 0098-3500.  doi:

+10.1145/146847.146928. url: http://doi.acm.org/10.1145/146847.146928.

--- a/lib/math/test/exp-impl.myr

+++ b/lib/math/test/exp-impl.myr

@@ -8,6 +8,7 @@

 		[.name="exp-02", .fn = exp02],

 		[.name="exp-03", .fn = exp03],

 		[.name="exp-04", .fn = exp04],

+		[.name="expm1-01", .fn = expm101],

 	][:])

@@ -19,6 +20,15 @@

 		(0x42000000, 0x568fa1fe),

 		(0xc2b00000, 0x0041edc4),

 		(0xc2b20000, 0x001840fc),

+		(0x7f7fffff, 0x7f800000),

+		(0x7f800000, 0x7f800000),

+		(0x7f800001, 0x7fc00000),

+		(0xc2cff1b3, 0x00000001),

+		(0xc2cff1b4, 0x00000001),

+		(0xc2cff1b5, 0000000000),

+		(0x42b17217, 0x7f7fff84),

+		(0x42b17218, 0x7f800000),

+		(0x42b17219, 0x7f800000),

 	][:]

 	for (x, y) : inputs

@@ -99,3 +109,28 @@

;;

;;

+const expm101 = {c

+	var inputs : (uint32, uint32)[:] = [

+		(0x00000000, 0x00000000),

+		(0x3f000000, 0x3f261299),

+		(0x34000000, 0x34000000),

+		(0x3c000000, 0x3c008056),

+		(0x42000000, 0x568fa1fe),

+		(0xc2b00000, 0xbf800000),

+		(0xc2b20000, 0xbf800000),

+		(0x01000000, 0x01000000),

+		(0x40000000, 0x40cc7326),

+		(0x42b17200, 0x7f7d7740),

+	][:]

+	for (x, y) : inputs

+		var xf : flt32 = std.flt32frombits(x)

+		var yf : flt32 = std.flt32frombits(y)

+		var rf = math.expm1(xf)

+		testr.check(c, rf == yf,

+			"expm1(0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",

+			x, y, std.flt32bits(rf))

+	;;

+}