shithub: mc

Download patch

ref: 4aa0e6c6b8a1a23b144b0382d6156aaa55b2c7fd
parent: ce931f082769f332e545cc477bc85c2bbca71ac2
author: S. Gilles <[email protected]>
date: Thu May 24 05:02:45 EDT 2018

Implement powr.

This is an attempt to extend the log algorithm of [Tan90] to compute
x^y. I don't intend to keep this algorithm for long, since I didn't
succeed very well. It's tremendously slow (worse even than mpfr!),
and has some truly terrible edge cases.

For example, computing pow(0x3f7ff7f3, 0xc7b58adf) (that is,
(0.99987715)^(-92949.74)) returns infinity, when it should be
0x47b1d362 (91046.765), which is a dozen orders of magnitude below
the infinity border.

In more pedestrian cases, errors of up to ~16 ulps can be observed.
For example, powr(0x3f80a83e, 0xc65492ba) = (1.0051343)^(-13604.682)
should return 0x0d3304a3, but it gives 0x0d3304b4.

--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -18,6 +18,9 @@
 	# polynomial evaluation methods
 	poly-impl.myr
 
+	# x^y
+	powr-impl.myr
+
 	# scalb (multiply x by 2^m)
 	scale2-impl.myr
 
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -18,6 +18,9 @@
 		horner_poly : (x : @f, a : @f[:] -> @f)
 		horner_polyu : (x : @f, a : @u[:] -> @f)
 
+		/* powr-impl */
+		powr : (x : @f, y : @f -> @f)
+
 		/* scale2-impl */
 		scale2 : (x : @f, m : @i -> @f)
 
@@ -83,6 +86,8 @@
 	horner_poly = {x, a; -> horner_poly32(x, a)}
 	horner_polyu = {x, a; -> horner_polyu32(x, a)}
 
+	powr = {x, y; -> powr32(x, y)}
+
 	scale2 = {x, m; -> scale232(x, m)}
 
 	sqrt = {x; -> sqrt32(x)}
@@ -108,6 +113,8 @@
 	horner_poly = {x, a; -> horner_poly64(x, a)}
 	horner_polyu = {x, a; -> horner_polyu64(x, a)}
 
+	powr = {x, y; -> powr64(x, y)}
+
 	scale2 = {x, m; -> scale264(x, m)}
 
 	sqrt = {x; -> sqrt64(x)}
@@ -143,6 +150,9 @@
 
 extern const horner_polyu32 : (x : flt32, a : uint32[:] -> flt32)
 extern const horner_polyu64 : (x : flt64, a : uint64[:] -> flt64)
+
+extern const powr32 : (x : flt32, y : flt32 -> flt32)
+extern const powr64 : (x : flt64, y : flt64 -> flt64)
 
 extern const scale232 : (x : flt32, m : int32 -> flt32)
 extern const scale264 : (x : flt64, m : int64 -> flt64)
--- a/lib/math/log-impl.myr
+++ b/lib/math/log-impl.myr
@@ -12,6 +12,10 @@
 
 	pkglocal const log1p32 : (x : flt32 -> flt32)
 	pkglocal const log1p64 : (x : flt64 -> flt64)
+
+	/* Constants from [Tan90], note that [128] contains accurate log(2) */
+	pkglocal const accurate_logs32 : (uint32, uint32)[129]
+	pkglocal const accurate_logs64 : (uint64, uint64)[129]
 ;;
 
 extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
@@ -43,7 +47,7 @@
 	T3exp : @u
 
 	/* For procedure 1 */
-	C : (@u, @u)[129]
+	C : (@u, @u)[:]
 	Ai : @u[:]
 
 	/* For procedure 2 */
@@ -51,6 +55,271 @@
 	Mtruncmask : @u
 ;;
 
+/* Accurate representations for log(1 + j/2^7), all j */
+const accurate_logs32 = [
+	(0000000000, 0000000000),
+	(0x3bff0000, 0x3429ac42),
+	(0x3c7e0000, 0x35a8b0fc),
+	(0x3cbdc000, 0x368d83eb),
+	(0x3cfc2000, 0xb6b278c4),
+	(0x3d1cf000, 0x3687b9ff),
+	(0x3d3ba000, 0x3631ec66),
+	(0x3d5a1000, 0x36dd7119),
+	(0x3d785000, 0x35c30046),
+	(0x3d8b2800, 0x365bba8e),
+	(0x3d9a1000, 0xb621a791),
+	(0x3da8d800, 0x34e7e0c3),
+	(0x3db78800, 0xb635d46a),
+	(0x3dc61800, 0x368bac63),
+	(0x3dd49000, 0x36da7496),
+	(0x3de2f000, 0x36a91eb8),
+	(0x3df13800, 0x34edc55e),
+	(0x3dff6800, 0xb6dd9c48),
+	(0x3e06bc00, 0xb44197b9),
+	(0x3e0db800, 0x36ab54be),
+	(0x3e14ac00, 0xb6b41f80),
+	(0x3e1b9000, 0xb4f7f85c),
+	(0x3e226800, 0x36adb32e),
+	(0x3e293800, 0xb650e2f2),
+	(0x3e2ff800, 0x36c1c29e),
+	(0x3e36b000, 0x35fe719d),
+	(0x3e3d5c00, 0x3590210e),
+	(0x3e43fc00, 0x36819483),
+	(0x3e4a9400, 0xb6958c2f),
+	(0x3e511c00, 0x36f07f8b),
+	(0x3e57a000, 0xb6dac5fd),
+	(0x3e5e1400, 0x354e85b2),
+	(0x3e648000, 0xb5838656),
+	(0x3e6ae000, 0x3685ad3f),
+	(0x3e713800, 0x356dc55e),
+	(0x3e778400, 0x36b72f71),
+	(0x3e7dc800, 0x36436af2),
+	(0x3e820200, 0xb6d35a59),
+	(0x3e851a00, 0xb6d8ec63),
+	(0x3e882c00, 0x363f9ae5),
+	(0x3e8b3a00, 0x36e55d5d),
+	(0x3e8e4400, 0x36c60b4d),
+	(0x3e914a00, 0x34fde7bd),
+	(0x3e944a00, 0x36d09ef4),
+	(0x3e974800, 0xb6ea28f7),
+	(0x3e9a3e00, 0x36ecd4c4),
+	(0x3e9d3200, 0x36455694),
+	(0x3ea02200, 0xb6779796),
+	(0x3ea30c00, 0x363c21c6),
+	(0x3ea5f200, 0x36fcabbc),
+	(0x3ea8d600, 0xb693c690),
+	(0x3eabb400, 0xb60e8baa),
+	(0x3eae8e00, 0xb51029fe),
+	(0x3eb16400, 0x353cae72),
+	(0x3eb43600, 0x3601e9b1),
+	(0x3eb70400, 0x366aa2ba),
+	(0x3eb9ce00, 0x36bfb5df),
+	(0x3ebc9600, 0xb6d50116),
+	(0x3ebf5800, 0xb5f88faa),
+	(0x3ec21600, 0x368ed0f4),
+	(0x3ec4d200, 0xb64793ec),
+	(0x3ec78800, 0x36f439b3),
+	(0x3eca3c00, 0x36a0e109),
+	(0x3eccec00, 0x36ac08bf),
+	(0x3ecf9a00, 0xb6e09a03),
+	(0x3ed24200, 0x3410e5bb),
+	(0x3ed4e800, 0xb69b2b30),
+	(0x3ed78a00, 0xb6b66dc4),
+	(0x3eda2800, 0xb6084337),
+	(0x3edcc200, 0x36c4b499),
+	(0x3edf5a00, 0x3659da72),
+	(0x3ee1ee00, 0x36bd3e6d),
+	(0x3ee48000, 0xb6038656),
+	(0x3ee70e00, 0xb687a3d0),
+	(0x3ee99800, 0xb4c0ff8a),
+	(0x3eec2000, 0xb6c6d3af),
+	(0x3eeea400, 0xb6afd9f2),
+	(0x3ef12400, 0x3601a7c7),
+	(0x3ef3a200, 0x351875a2),
+	(0x3ef61c00, 0x36ce9234),
+	(0x3ef89400, 0x3675faf0),
+	(0x3efb0a00, 0xb6e02c7f),
+	(0x3efd7a00, 0x36c47bc8),
+	(0x3effea00, 0xb68fbd40),
+	(0x3f012b00, 0xb6d5a5a3),
+	(0x3f025f00, 0xb444adb2),
+	(0x3f039200, 0xb551f190),
+	(0x3f04c300, 0x36f4f573),
+	(0x3f05f400, 0xb6d1bdad),
+	(0x3f072200, 0x36985d1d),
+	(0x3f085000, 0xb6c61d2b),
+	(0x3f097c00, 0xb6e6a6c1),
+	(0x3f0aa600, 0x35f4bd35),
+	(0x3f0bcf00, 0x36abbd8a),
+	(0x3f0cf700, 0x36568cf9),
+	(0x3f0e1e00, 0xb67c11d8),
+	(0x3f0f4300, 0xb4a18fbf),
+	(0x3f106700, 0xb5cb9b55),
+	(0x3f118a00, 0xb6f28414),
+	(0x3f12ab00, 0xb6062ce1),
+	(0x3f13cb00, 0xb576bb27),
+	(0x3f14ea00, 0xb68013d5),
+	(0x3f160700, 0x369ed449),
+	(0x3f172400, 0xb6bc91c0),
+	(0x3f183f00, 0xb68ccb0f),
+	(0x3f195900, 0xb6cc6ede),
+	(0x3f1a7100, 0x3689d9ce),
+	(0x3f1b8900, 0xb684ab8c),
+	(0x3f1c9f00, 0x34d3562a),
+	(0x3f1db400, 0x36094000),
+	(0x3f1ec800, 0x359a9c56),
+	(0x3f1fdb00, 0xb60f65d2),
+	(0x3f20ec00, 0x36fe8467),
+	(0x3f21fd00, 0xb368318d),
+	(0x3f230c00, 0x36bc21c6),
+	(0x3f241b00, 0xb6c2e157),
+	(0x3f252800, 0xb67449f8),
+	(0x3f263400, 0xb64a0662),
+	(0x3f273f00, 0xb67dc915),
+	(0x3f284900, 0xb6c33fe9),
+	(0x3f295100, 0x36d265bc),
+	(0x3f2a5900, 0x360cf333),
+	(0x3f2b6000, 0xb6454982),
+	(0x3f2c6500, 0x36db5cd8),
+	(0x3f2d6a00, 0x34186b3e),
+	(0x3f2e6e00, 0xb6e2393f),
+	(0x3f2f7000, 0x35aa4906),
+	(0x3f307200, 0xb6d0bb87),
+	(0x3f317200, 0x35bfbe8e), /* Note C[128] is log2 */
+]
+
+const accurate_logs64 = [
+	(000000000000000000, 000000000000000000),
+	(0x3f7fe02a6b200000, 0xbd6f30ee07912df9),
+	(0x3f8fc0a8b1000000, 0xbd5fe0e183092c59),
+	(0x3f97b91b07d80000, 0xbd62772ab6c0559c),
+	(0x3f9f829b0e780000, 0x3d2980267c7e09e4),
+	(0x3fa39e87ba000000, 0xbd642a056fea4dfd),
+	(0x3fa77458f6340000, 0xbd62303b9cb0d5e1),
+	(0x3fab42dd71180000, 0x3d671bec28d14c7e),
+	(0x3faf0a30c0100000, 0x3d662a6617cc9717),
+	(0x3fb16536eea40000, 0xbd60a3e2f3b47d18),
+	(0x3fb341d7961c0000, 0xbd4717b6b33e44f8),
+	(0x3fb51b073f060000, 0x3d383f69278e686a),
+	(0x3fb6f0d28ae60000, 0xbd62968c836cc8c2),
+	(0x3fb8c345d6320000, 0xbd5937c294d2f567),
+	(0x3fba926d3a4a0000, 0x3d6aac6ca17a4554),
+	(0x3fbc5e548f5c0000, 0xbd4c5e7514f4083f),
+	(0x3fbe27076e2a0000, 0x3d6e5cbd3d50fffc),
+	(0x3fbfec9131dc0000, 0xbd354555d1ae6607),
+	(0x3fc0d77e7cd10000, 0xbd6c69a65a23a170),
+	(0x3fc1b72ad52f0000, 0x3d69e80a41811a39),
+	(0x3fc29552f8200000, 0xbd35b967f4471dfc),
+	(0x3fc371fc201f0000, 0xbd6c22f10c9a4ea8),
+	(0x3fc44d2b6ccb0000, 0x3d6f4799f4f6543e),
+	(0x3fc526e5e3a20000, 0xbd62f21746ff8a47),
+	(0x3fc5ff3070a80000, 0xbd6b0b0de3077d7e),
+	(0x3fc6d60fe71a0000, 0xbd56f1b955c4d1da),
+	(0x3fc7ab8902110000, 0xbd537b720e4a694b),
+	(0x3fc87fa065210000, 0xbd5b77b7effb7f41),
+	(0x3fc9525a9cf40000, 0x3d65ad1d904c1d4e),
+	(0x3fca23bc1fe30000, 0xbd62a739b23b93e1),
+	(0x3fcaf3c94e810000, 0xbd600349cc67f9b2),
+	(0x3fcbc286742e0000, 0xbd6cca75818c5dbc),
+	(0x3fcc8ff7c79b0000, 0xbd697794f689f843),
+	(0x3fcd5c216b500000, 0xbd611ba91bbca682),
+	(0x3fce27076e2b0000, 0xbd3a342c2af0003c),
+	(0x3fcef0adcbdc0000, 0x3d664d948637950e),
+	(0x3fcfb9186d5e0000, 0x3d5f1546aaa3361c),
+	(0x3fd0402594b50000, 0xbd67df928ec217a5),
+	(0x3fd0a324e2738000, 0x3d50e35f73f7a018),
+	(0x3fd1058bf9ae8000, 0xbd6a9573b02faa5a),
+	(0x3fd1675cabab8000, 0x3d630701ce63eab9),
+	(0x3fd1c898c1698000, 0x3d59fafbc68e7540),
+	(0x3fd22941fbcf8000, 0xbd3a6976f5eb0963),
+	(0x3fd2895a13de8000, 0x3d3a8d7ad24c13f0),
+	(0x3fd2e8e2bae10000, 0x3d5d309c2cc91a85),
+	(0x3fd347dd9a988000, 0xbd25594dd4c58092),
+	(0x3fd3a64c55698000, 0xbd6d0b1c68651946),
+	(0x3fd4043086868000, 0x3d63f1de86093efa),
+	(0x3fd4618bc21c8000, 0xbd609ec17a426426),
+	(0x3fd4be5f95778000, 0xbd3d7c92cd9ad824),
+	(0x3fd51aad872e0000, 0xbd3f4bd8db0a7cc1),
+	(0x3fd5767717458000, 0xbd62c9d5b2a49af9),
+	(0x3fd5d1bdbf580000, 0x3d4394a11b1c1ee4),
+	(0x3fd62c82f2ba0000, 0xbd6c356848506ead),
+	(0x3fd686c81e9b0000, 0x3d54aec442be1015),
+	(0x3fd6e08eaa2b8000, 0x3d60f1c609c98c6c),
+	(0x3fd739d7f6bc0000, 0xbd67fcb18ed9d603),
+	(0x3fd792a55fdd8000, 0xbd6c2ec1f512dc03),
+	(0x3fd7eaf83b828000, 0x3d67e1b259d2f3da),
+	(0x3fd842d1da1e8000, 0x3d462e927628cbc2),
+	(0x3fd89a3386c18000, 0xbd6ed2a52c73bf78),
+	(0x3fd8f11e87368000, 0xbd5d3881e8962a96),
+	(0x3fd947941c210000, 0x3d56faba4cdd147d),
+	(0x3fd99d9581180000, 0xbd5f753456d113b8),
+	(0x3fd9f323ecbf8000, 0x3d584bf2b68d766f),
+	(0x3fda484090e58000, 0x3d6d8515fe535b87),
+	(0x3fda9cec9a9a0000, 0x3d40931a909fea5e),
+	(0x3fdaf12932478000, 0xbd3e53bb31eed7a9),
+	(0x3fdb44f77bcc8000, 0x3d4ec5197ddb55d3),
+	(0x3fdb985896930000, 0x3d50fb598fb14f89),
+	(0x3fdbeb4d9da70000, 0x3d5b7bf7861d37ac),
+	(0x3fdc3dd7a7cd8000, 0x3d66a6b9d9e0a5bd),
+	(0x3fdc8ff7c79a8000, 0x3d5a21ac25d81ef3),
+	(0x3fdce1af0b860000, 0xbd48290905a86aa6),
+	(0x3fdd32fe7e010000, 0xbd542a9e21373414),
+	(0x3fdd83e7258a0000, 0x3d679f2828add176),
+	(0x3fddd46a04c20000, 0xbd6dafa08cecadb1),
+	(0x3fde24881a7c8000, 0xbd53d9e34270ba6b),
+	(0x3fde744261d68000, 0x3d3e1f8df68dbcf3),
+	(0x3fdec399d2468000, 0x3d49802eb9dca7e7),
+	(0x3fdf128f5faf0000, 0x3d3bb2cd720ec44c),
+	(0x3fdf6123fa700000, 0x3d645630a2b61e5b),
+	(0x3fdfaf588f790000, 0xbd49c24ca098362b),
+	(0x3fdffd2e08580000, 0xbd46cf54d05f9367),
+	(0x3fe02552a5a5c000, 0x3d60fec69c695d7f),
+	(0x3fe04bdf9da94000, 0xbd692d9a033eff75),
+	(0x3fe0723e5c1cc000, 0x3d6f404e57963891),
+	(0x3fe0986f4f574000, 0xbd55be8dc04ad601),
+	(0x3fe0be72e4254000, 0xbd657d49676844cc),
+	(0x3fe0e44985d1c000, 0x3d5917edd5cbbd2d),
+	(0x3fe109f39e2d4000, 0x3d592dfbc7d93617),
+	(0x3fe12f7195940000, 0xbd6043acfedce638),
+	(0x3fe154c3d2f4c000, 0x3d65e9a98f33a396),
+	(0x3fe179eabbd88000, 0x3d69a0bfc60e6fa0),
+	(0x3fe19ee6b467c000, 0x3d52dd98b97baef0),
+	(0x3fe1c3b81f714000, 0xbd3eda1b58389902),
+	(0x3fe1e85f5e704000, 0x3d1a07bd8b34be7c),
+	(0x3fe20cdcd192c000, 0xbd64926cafc2f08a),
+	(0x3fe23130d7bec000, 0xbd17afa4392f1ba7),
+	(0x3fe2555bce990000, 0xbd506987f78a4a5e),
+	(0x3fe2795e1289c000, 0xbd5dca290f81848d),
+	(0x3fe29d37fec2c000, 0xbd5eea6f465268b4),
+	(0x3fe2c0e9ed448000, 0x3d5d1772f5386374),
+	(0x3fe2e47436e40000, 0x3d334202a10c3491),
+	(0x3fe307d7334f0000, 0x3d60be1fb590a1f5),
+	(0x3fe32b1339120000, 0x3d6d71320556b67b),
+	(0x3fe34e289d9d0000, 0xbd6e2ce9146d277a),
+	(0x3fe37117b5474000, 0x3d4ed71774092113),
+	(0x3fe393e0d3564000, 0xbd65e6563bbd9fc9),
+	(0x3fe3b6844a000000, 0xbd3eea838909f3d3),
+	(0x3fe3d9026a714000, 0x3d66faa404263d0b),
+	(0x3fe3fb5b84d18000, 0xbd60bda4b162afa3),
+	(0x3fe41d8fe8468000, 0xbd5aa33736867a17),
+	(0x3fe43f9fe2f9c000, 0x3d5ccef4e4f736c2),
+	(0x3fe4618bc21c4000, 0x3d6ec27d0b7b37b3),
+	(0x3fe48353d1ea8000, 0x3d51bee7abd17660),
+	(0x3fe4a4f85db04000, 0xbd244fdd840b8591),
+	(0x3fe4c679afcd0000, 0xbd61c64e971322ce),
+	(0x3fe4e7d811b74000, 0x3d6bb09cb0985646),
+	(0x3fe50913cc018000, 0xbd6794b434c5a4f5),
+	(0x3fe52a2d265bc000, 0x3d46abb9df22bc57),
+	(0x3fe54b2467998000, 0x3d6497a915428b44),
+	(0x3fe56bf9d5b40000, 0xbd58cd7dc73bd194),
+	(0x3fe58cadb5cd8000, 0xbd49db3db43689b4),
+	(0x3fe5ad404c358000, 0x3d6f2cfb29aaa5f0),
+	(0x3fe5cdb1dc6c0000, 0x3d67648cf6e3c5d7),
+	(0x3fe5ee02a9240000, 0x3d667570d6095fd2),
+	(0x3fe60e32f4478000, 0x3d51b194f912b417),
+	(0x3fe62e42fefa4000, 0xbd48432a1b0e2634),
+]
+
 const desc32 : fltdesc(flt32, uint32, int32) = [
 	.explode = std.flt32explode,
 	.assem = std.flt32assem,
@@ -71,137 +340,7 @@
 	.log1pT1 = 0xbd782a03, /* Just smaller than e^(-1/16) - 1 ~= -0.0605869 */
 	.log1pT2 = 0x3d8415ac, /* Just larger than e^(1/16) - 1 ~= 0.06449445 */
 	.T3exp = 26, /* Beyond 2^T3exp, 1 + x rounds to x */
-	.C = [ /* Accurate representations for log(1 + j/2^7), all j */
-		(0000000000, 0000000000),
-		(0x3bff0000, 0x3429ac42),
-		(0x3c7e0000, 0x35a8b0fc),
-		(0x3cbdc000, 0x368d83eb),
-		(0x3cfc2000, 0xb6b278c4),
-		(0x3d1cf000, 0x3687b9ff),
-		(0x3d3ba000, 0x3631ec66),
-		(0x3d5a1000, 0x36dd7119),
-		(0x3d785000, 0x35c30046),
-		(0x3d8b2800, 0x365bba8e),
-		(0x3d9a1000, 0xb621a791),
-		(0x3da8d800, 0x34e7e0c3),
-		(0x3db78800, 0xb635d46a),
-		(0x3dc61800, 0x368bac63),
-		(0x3dd49000, 0x36da7496),
-		(0x3de2f000, 0x36a91eb8),
-		(0x3df13800, 0x34edc55e),
-		(0x3dff6800, 0xb6dd9c48),
-		(0x3e06bc00, 0xb44197b9),
-		(0x3e0db800, 0x36ab54be),
-		(0x3e14ac00, 0xb6b41f80),
-		(0x3e1b9000, 0xb4f7f85c),
-		(0x3e226800, 0x36adb32e),
-		(0x3e293800, 0xb650e2f2),
-		(0x3e2ff800, 0x36c1c29e),
-		(0x3e36b000, 0x35fe719d),
-		(0x3e3d5c00, 0x3590210e),
-		(0x3e43fc00, 0x36819483),
-		(0x3e4a9400, 0xb6958c2f),
-		(0x3e511c00, 0x36f07f8b),
-		(0x3e57a000, 0xb6dac5fd),
-		(0x3e5e1400, 0x354e85b2),
-		(0x3e648000, 0xb5838656),
-		(0x3e6ae000, 0x3685ad3f),
-		(0x3e713800, 0x356dc55e),
-		(0x3e778400, 0x36b72f71),
-		(0x3e7dc800, 0x36436af2),
-		(0x3e820200, 0xb6d35a59),
-		(0x3e851a00, 0xb6d8ec63),
-		(0x3e882c00, 0x363f9ae5),
-		(0x3e8b3a00, 0x36e55d5d),
-		(0x3e8e4400, 0x36c60b4d),
-		(0x3e914a00, 0x34fde7bd),
-		(0x3e944a00, 0x36d09ef4),
-		(0x3e974800, 0xb6ea28f7),
-		(0x3e9a3e00, 0x36ecd4c4),
-		(0x3e9d3200, 0x36455694),
-		(0x3ea02200, 0xb6779796),
-		(0x3ea30c00, 0x363c21c6),
-		(0x3ea5f200, 0x36fcabbc),
-		(0x3ea8d600, 0xb693c690),
-		(0x3eabb400, 0xb60e8baa),
-		(0x3eae8e00, 0xb51029fe),
-		(0x3eb16400, 0x353cae72),
-		(0x3eb43600, 0x3601e9b1),
-		(0x3eb70400, 0x366aa2ba),
-		(0x3eb9ce00, 0x36bfb5df),
-		(0x3ebc9600, 0xb6d50116),
-		(0x3ebf5800, 0xb5f88faa),
-		(0x3ec21600, 0x368ed0f4),
-		(0x3ec4d200, 0xb64793ec),
-		(0x3ec78800, 0x36f439b3),
-		(0x3eca3c00, 0x36a0e109),
-		(0x3eccec00, 0x36ac08bf),
-		(0x3ecf9a00, 0xb6e09a03),
-		(0x3ed24200, 0x3410e5bb),
-		(0x3ed4e800, 0xb69b2b30),
-		(0x3ed78a00, 0xb6b66dc4),
-		(0x3eda2800, 0xb6084337),
-		(0x3edcc200, 0x36c4b499),
-		(0x3edf5a00, 0x3659da72),
-		(0x3ee1ee00, 0x36bd3e6d),
-		(0x3ee48000, 0xb6038656),
-		(0x3ee70e00, 0xb687a3d0),
-		(0x3ee99800, 0xb4c0ff8a),
-		(0x3eec2000, 0xb6c6d3af),
-		(0x3eeea400, 0xb6afd9f2),
-		(0x3ef12400, 0x3601a7c7),
-		(0x3ef3a200, 0x351875a2),
-		(0x3ef61c00, 0x36ce9234),
-		(0x3ef89400, 0x3675faf0),
-		(0x3efb0a00, 0xb6e02c7f),
-		(0x3efd7a00, 0x36c47bc8),
-		(0x3effea00, 0xb68fbd40),
-		(0x3f012b00, 0xb6d5a5a3),
-		(0x3f025f00, 0xb444adb2),
-		(0x3f039200, 0xb551f190),
-		(0x3f04c300, 0x36f4f573),
-		(0x3f05f400, 0xb6d1bdad),
-		(0x3f072200, 0x36985d1d),
-		(0x3f085000, 0xb6c61d2b),
-		(0x3f097c00, 0xb6e6a6c1),
-		(0x3f0aa600, 0x35f4bd35),
-		(0x3f0bcf00, 0x36abbd8a),
-		(0x3f0cf700, 0x36568cf9),
-		(0x3f0e1e00, 0xb67c11d8),
-		(0x3f0f4300, 0xb4a18fbf),
-		(0x3f106700, 0xb5cb9b55),
-		(0x3f118a00, 0xb6f28414),
-		(0x3f12ab00, 0xb6062ce1),
-		(0x3f13cb00, 0xb576bb27),
-		(0x3f14ea00, 0xb68013d5),
-		(0x3f160700, 0x369ed449),
-		(0x3f172400, 0xb6bc91c0),
-		(0x3f183f00, 0xb68ccb0f),
-		(0x3f195900, 0xb6cc6ede),
-		(0x3f1a7100, 0x3689d9ce),
-		(0x3f1b8900, 0xb684ab8c),
-		(0x3f1c9f00, 0x34d3562a),
-		(0x3f1db400, 0x36094000),
-		(0x3f1ec800, 0x359a9c56),
-		(0x3f1fdb00, 0xb60f65d2),
-		(0x3f20ec00, 0x36fe8467),
-		(0x3f21fd00, 0xb368318d),
-		(0x3f230c00, 0x36bc21c6),
-		(0x3f241b00, 0xb6c2e157),
-		(0x3f252800, 0xb67449f8),
-		(0x3f263400, 0xb64a0662),
-		(0x3f273f00, 0xb67dc915),
-		(0x3f284900, 0xb6c33fe9),
-		(0x3f295100, 0x36d265bc),
-		(0x3f2a5900, 0x360cf333),
-		(0x3f2b6000, 0xb6454982),
-		(0x3f2c6500, 0x36db5cd8),
-		(0x3f2d6a00, 0x34186b3e),
-		(0x3f2e6e00, 0xb6e2393f),
-		(0x3f2f7000, 0x35aa4906),
-		(0x3f307200, 0xb6d0bb87),
-		(0x3f317200, 0x35bfbe8e), /* Note C[128] is log2 */
-	],
+	.C = accurate_logs32[:],
 	.Ai = [ 0x3daaaac2 ][:], /* Coefficients for log(1 + f/F) */
 	.Bi = [ /* Coefficients for log(1 + f) in terms of a = 2f/(2 + f) */
 		0x3daaaaa9,
@@ -230,137 +369,7 @@
 	.log1pT1 = 0xbfaf0540428fd5c4, /* Just smaller than e^(-1/16) - 1 ~= -0.0605869 */
 	.log1pT2 = 0x3fb082b577d34ed8, /* Just larger than e^(1/16) - 1 ~= 0.06449445 */
 	.T3exp = 55, /* Beyond 2^T3exp, 1 + x rounds to x */
-	.C = [
-		(000000000000000000, 000000000000000000),
-		(0x3f7fe02a6b200000, 0xbd6f30ee07912df9),
-		(0x3f8fc0a8b1000000, 0xbd5fe0e183092c59),
-		(0x3f97b91b07d80000, 0xbd62772ab6c0559c),
-		(0x3f9f829b0e780000, 0x3d2980267c7e09e4),
-		(0x3fa39e87ba000000, 0xbd642a056fea4dfd),
-		(0x3fa77458f6340000, 0xbd62303b9cb0d5e1),
-		(0x3fab42dd71180000, 0x3d671bec28d14c7e),
-		(0x3faf0a30c0100000, 0x3d662a6617cc9717),
-		(0x3fb16536eea40000, 0xbd60a3e2f3b47d18),
-		(0x3fb341d7961c0000, 0xbd4717b6b33e44f8),
-		(0x3fb51b073f060000, 0x3d383f69278e686a),
-		(0x3fb6f0d28ae60000, 0xbd62968c836cc8c2),
-		(0x3fb8c345d6320000, 0xbd5937c294d2f567),
-		(0x3fba926d3a4a0000, 0x3d6aac6ca17a4554),
-		(0x3fbc5e548f5c0000, 0xbd4c5e7514f4083f),
-		(0x3fbe27076e2a0000, 0x3d6e5cbd3d50fffc),
-		(0x3fbfec9131dc0000, 0xbd354555d1ae6607),
-		(0x3fc0d77e7cd10000, 0xbd6c69a65a23a170),
-		(0x3fc1b72ad52f0000, 0x3d69e80a41811a39),
-		(0x3fc29552f8200000, 0xbd35b967f4471dfc),
-		(0x3fc371fc201f0000, 0xbd6c22f10c9a4ea8),
-		(0x3fc44d2b6ccb0000, 0x3d6f4799f4f6543e),
-		(0x3fc526e5e3a20000, 0xbd62f21746ff8a47),
-		(0x3fc5ff3070a80000, 0xbd6b0b0de3077d7e),
-		(0x3fc6d60fe71a0000, 0xbd56f1b955c4d1da),
-		(0x3fc7ab8902110000, 0xbd537b720e4a694b),
-		(0x3fc87fa065210000, 0xbd5b77b7effb7f41),
-		(0x3fc9525a9cf40000, 0x3d65ad1d904c1d4e),
-		(0x3fca23bc1fe30000, 0xbd62a739b23b93e1),
-		(0x3fcaf3c94e810000, 0xbd600349cc67f9b2),
-		(0x3fcbc286742e0000, 0xbd6cca75818c5dbc),
-		(0x3fcc8ff7c79b0000, 0xbd697794f689f843),
-		(0x3fcd5c216b500000, 0xbd611ba91bbca682),
-		(0x3fce27076e2b0000, 0xbd3a342c2af0003c),
-		(0x3fcef0adcbdc0000, 0x3d664d948637950e),
-		(0x3fcfb9186d5e0000, 0x3d5f1546aaa3361c),
-		(0x3fd0402594b50000, 0xbd67df928ec217a5),
-		(0x3fd0a324e2738000, 0x3d50e35f73f7a018),
-		(0x3fd1058bf9ae8000, 0xbd6a9573b02faa5a),
-		(0x3fd1675cabab8000, 0x3d630701ce63eab9),
-		(0x3fd1c898c1698000, 0x3d59fafbc68e7540),
-		(0x3fd22941fbcf8000, 0xbd3a6976f5eb0963),
-		(0x3fd2895a13de8000, 0x3d3a8d7ad24c13f0),
-		(0x3fd2e8e2bae10000, 0x3d5d309c2cc91a85),
-		(0x3fd347dd9a988000, 0xbd25594dd4c58092),
-		(0x3fd3a64c55698000, 0xbd6d0b1c68651946),
-		(0x3fd4043086868000, 0x3d63f1de86093efa),
-		(0x3fd4618bc21c8000, 0xbd609ec17a426426),
-		(0x3fd4be5f95778000, 0xbd3d7c92cd9ad824),
-		(0x3fd51aad872e0000, 0xbd3f4bd8db0a7cc1),
-		(0x3fd5767717458000, 0xbd62c9d5b2a49af9),
-		(0x3fd5d1bdbf580000, 0x3d4394a11b1c1ee4),
-		(0x3fd62c82f2ba0000, 0xbd6c356848506ead),
-		(0x3fd686c81e9b0000, 0x3d54aec442be1015),
-		(0x3fd6e08eaa2b8000, 0x3d60f1c609c98c6c),
-		(0x3fd739d7f6bc0000, 0xbd67fcb18ed9d603),
-		(0x3fd792a55fdd8000, 0xbd6c2ec1f512dc03),
-		(0x3fd7eaf83b828000, 0x3d67e1b259d2f3da),
-		(0x3fd842d1da1e8000, 0x3d462e927628cbc2),
-		(0x3fd89a3386c18000, 0xbd6ed2a52c73bf78),
-		(0x3fd8f11e87368000, 0xbd5d3881e8962a96),
-		(0x3fd947941c210000, 0x3d56faba4cdd147d),
-		(0x3fd99d9581180000, 0xbd5f753456d113b8),
-		(0x3fd9f323ecbf8000, 0x3d584bf2b68d766f),
-		(0x3fda484090e58000, 0x3d6d8515fe535b87),
-		(0x3fda9cec9a9a0000, 0x3d40931a909fea5e),
-		(0x3fdaf12932478000, 0xbd3e53bb31eed7a9),
-		(0x3fdb44f77bcc8000, 0x3d4ec5197ddb55d3),
-		(0x3fdb985896930000, 0x3d50fb598fb14f89),
-		(0x3fdbeb4d9da70000, 0x3d5b7bf7861d37ac),
-		(0x3fdc3dd7a7cd8000, 0x3d66a6b9d9e0a5bd),
-		(0x3fdc8ff7c79a8000, 0x3d5a21ac25d81ef3),
-		(0x3fdce1af0b860000, 0xbd48290905a86aa6),
-		(0x3fdd32fe7e010000, 0xbd542a9e21373414),
-		(0x3fdd83e7258a0000, 0x3d679f2828add176),
-		(0x3fddd46a04c20000, 0xbd6dafa08cecadb1),
-		(0x3fde24881a7c8000, 0xbd53d9e34270ba6b),
-		(0x3fde744261d68000, 0x3d3e1f8df68dbcf3),
-		(0x3fdec399d2468000, 0x3d49802eb9dca7e7),
-		(0x3fdf128f5faf0000, 0x3d3bb2cd720ec44c),
-		(0x3fdf6123fa700000, 0x3d645630a2b61e5b),
-		(0x3fdfaf588f790000, 0xbd49c24ca098362b),
-		(0x3fdffd2e08580000, 0xbd46cf54d05f9367),
-		(0x3fe02552a5a5c000, 0x3d60fec69c695d7f),
-		(0x3fe04bdf9da94000, 0xbd692d9a033eff75),
-		(0x3fe0723e5c1cc000, 0x3d6f404e57963891),
-		(0x3fe0986f4f574000, 0xbd55be8dc04ad601),
-		(0x3fe0be72e4254000, 0xbd657d49676844cc),
-		(0x3fe0e44985d1c000, 0x3d5917edd5cbbd2d),
-		(0x3fe109f39e2d4000, 0x3d592dfbc7d93617),
-		(0x3fe12f7195940000, 0xbd6043acfedce638),
-		(0x3fe154c3d2f4c000, 0x3d65e9a98f33a396),
-		(0x3fe179eabbd88000, 0x3d69a0bfc60e6fa0),
-		(0x3fe19ee6b467c000, 0x3d52dd98b97baef0),
-		(0x3fe1c3b81f714000, 0xbd3eda1b58389902),
-		(0x3fe1e85f5e704000, 0x3d1a07bd8b34be7c),
-		(0x3fe20cdcd192c000, 0xbd64926cafc2f08a),
-		(0x3fe23130d7bec000, 0xbd17afa4392f1ba7),
-		(0x3fe2555bce990000, 0xbd506987f78a4a5e),
-		(0x3fe2795e1289c000, 0xbd5dca290f81848d),
-		(0x3fe29d37fec2c000, 0xbd5eea6f465268b4),
-		(0x3fe2c0e9ed448000, 0x3d5d1772f5386374),
-		(0x3fe2e47436e40000, 0x3d334202a10c3491),
-		(0x3fe307d7334f0000, 0x3d60be1fb590a1f5),
-		(0x3fe32b1339120000, 0x3d6d71320556b67b),
-		(0x3fe34e289d9d0000, 0xbd6e2ce9146d277a),
-		(0x3fe37117b5474000, 0x3d4ed71774092113),
-		(0x3fe393e0d3564000, 0xbd65e6563bbd9fc9),
-		(0x3fe3b6844a000000, 0xbd3eea838909f3d3),
-		(0x3fe3d9026a714000, 0x3d66faa404263d0b),
-		(0x3fe3fb5b84d18000, 0xbd60bda4b162afa3),
-		(0x3fe41d8fe8468000, 0xbd5aa33736867a17),
-		(0x3fe43f9fe2f9c000, 0x3d5ccef4e4f736c2),
-		(0x3fe4618bc21c4000, 0x3d6ec27d0b7b37b3),
-		(0x3fe48353d1ea8000, 0x3d51bee7abd17660),
-		(0x3fe4a4f85db04000, 0xbd244fdd840b8591),
-		(0x3fe4c679afcd0000, 0xbd61c64e971322ce),
-		(0x3fe4e7d811b74000, 0x3d6bb09cb0985646),
-		(0x3fe50913cc018000, 0xbd6794b434c5a4f5),
-		(0x3fe52a2d265bc000, 0x3d46abb9df22bc57),
-		(0x3fe54b2467998000, 0x3d6497a915428b44),
-		(0x3fe56bf9d5b40000, 0xbd58cd7dc73bd194),
-		(0x3fe58cadb5cd8000, 0xbd49db3db43689b4),
-		(0x3fe5ad404c358000, 0x3d6f2cfb29aaa5f0),
-		(0x3fe5cdb1dc6c0000, 0x3d67648cf6e3c5d7),
-		(0x3fe5ee02a9240000, 0x3d667570d6095fd2),
-		(0x3fe60e32f4478000, 0x3d51b194f912b417),
-		(0x3fe62e42fefa4000, 0xbd48432a1b0e2634),
-	],
+	.C = accurate_logs64[:],
 	.Ai = [
 		0x3fb5555555550286,
 		0x3f8999a0bc712416,
--- /dev/null
+++ b/lib/math/powr-impl.myr
@@ -1,0 +1,406 @@
+use std
+
+use "fpmath"
+use "log-impl"
+use "util"
+
+/*
+   This is an implementation of powr, not pow, so the special cases
+   are tailored more closely to the mathematical x^y = e^(y * log(x))
+   than to historical C implementations (pow was aligned to the C99
+   standard, which was aligned to codify existing practice).
+
+   Even then, some parts of the powr specification are unclear. For
+   example, IEEE 754-2008 does not specify what powr(infty, y) must
+   return when y is not 0.0 (an erratum was planned in 2010, but
+   does not appear to have been released as of 2018).
+
+   As a note: unlike many other functions in this library, there
+   has been no serious analysis of the accuracy and speed of this
+   particular implementation. Interested observers wishing to improve
+   this library will probably find this file goldmine of mistakes,
+   both theoretical and practical.
+ */
+pkg math =
+	pkglocal const powr32 : (x : flt32, y : flt32 -> flt32)
+	pkglocal const powr64 : (x : flt64, y : flt64 -> flt64)
+;;
+
+type fltdesc(@f, @u, @i) = struct
+	explode : (f : @f -> (bool, @i, @u))
+	assem : (n : bool, e : @i, s : @u -> @f)
+	tobits : (f : @f -> @u)
+	frombits : (u : @u -> @f)
+	nan : @u
+	inf : @u
+	expmask : @u
+	precision : @u
+	emax : @i
+	emin : @i
+	sgnmask : @u
+	sig8mask : @u
+	sig8last : @u
+	split_prec_mask : @u
+	split_prec_mask2 : @u
+	C : (@u, @u)[:]
+	eps_inf_border : @u
+	eps_zero_border : @u
+	exp_inf_border : @u
+	exp_zero_border : @u
+	exp_subnormal_border : @u
+	itercount : @u
+;;
+
+const desc32 : fltdesc(flt32, uint32, int32) =  [
+	.explode = std.flt32explode,
+	.assem = std.flt32assem,
+	.tobits = std.flt32bits,
+	.frombits = std.flt32frombits,
+	.nan = 0x7fc00000,
+	.inf = 0x7f800000,
+	.expmask = 0x7f800000, /* mask to detect inf or NaN (inf, repeated for clarity) */
+	.precision = 24,
+	.emax = 127,
+	.emin = -126,
+	.sgnmask = 1 << 31,
+	.sig8mask = 0xffff0000, /* Mask to get 8 significant bits */
+	.sig8last = 16, /* Last bit kept when masking */
+	.split_prec_mask = 0xffff0000, /* 16 trailing zeros */
+	.split_prec_mask2 = 0xfffff000, /* 12 trailing zeros */
+	.C = accurate_logs32[0:130], /* See log-impl.myr */
+	.eps_inf_border = 0x4eb00f34, /* maximal y st. (1.00..1)^y < oo */
+	.eps_zero_border = 0x4ecff1b4, /* minimal y st. (0.99..9)^y > 0 */
+	.exp_inf_border = 0x42b17218, /* maximal y such that e^y < oo */
+	.exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */
+	.exp_subnormal_border = 0xc2aeac50, /* minimal y such that e^y is normal */
+	.itercount = 4, /* How many iterations of Taylor series for (1 + f)^y' */
+]
+
+const desc64 : fltdesc(flt64, uint64, int64) =  [
+	.explode = std.flt64explode,
+	.assem = std.flt64assem,
+	.tobits = std.flt64bits,
+	.frombits = std.flt64frombits,
+	.nan = 0x7ff8000000000000,
+	.inf = 0x7ff0000000000000,
+	.expmask = 0x7ff0000000000000,
+	.precision = 53,
+	.emax = 1023,
+	.emin = -1022,
+	.sgnmask = 1 << 63,
+	.sig8mask = 0xffffe00000000000, /* Mask to get 8 significant bits */
+	.sig8last = 45, /* Last bit kept when masking */
+	.split_prec_mask = 0xffffff0000000000, /* 40 trailing zeroes */
+	.split_prec_mask2 = 0xfffffffffffc0000, /* 18 trailing zeroes */
+	.C = accurate_logs64[0:130], /* See log-impl.myr */
+	.eps_inf_border = 0x43d628b76e3a7b61, /* maximal y st. (1.00..1)^y < oo */
+	.eps_zero_border = 0x43d74e9c65eceee0, /*  minimal y st. (0.99..9)^y > 0 */
+	.exp_inf_border = 0x40862e42fefa39ef, /* maximal y such that e^y < oo */
+	.exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */
+	.exp_subnormal_border = 0xc086232bdd7abcd2, /* minimal y such that e^y is normal */
+	.itercount = 8,
+]
+
+const powr32 = {x : flt32, y : flt32
+	-> powrgen(x, y, desc32)
+}
+
+const powr64 = {x : flt64, y : flt64
+	-> powrgen(x, y, desc64)
+}
+
+generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
+	var xb, yb
+	xb = d.tobits(x)
+	yb = d.tobits(y)
+
+	var xn : bool, xe : @i, xs : @u
+	var yn : bool, ye : @i, ys : @u
+	(xn, xe, xs) = d.explode(x)
+	(yn, ye, ys) = d.explode(y)
+
+	/*
+	   Special cases. Note we do not follow IEEE exceptions.
+	 */
+	if std.isnan(x) || std.isnan(y)
+		/* Propagate NaN */
+		-> d.frombits(d.nan)
+	elif (xb & ~d.sgnmask == 0)
+		if (yb & ~d.sgnmask == 0)
+			/* 0^0 is undefined. */
+			-> d.frombits(d.nan)
+		elif yn
+			/* 0^(< 0) is infinity */
+			-> d.frombits(d.inf)
+		else
+			/* otherwise, 0^y = 0. */
+			-> (0.0 : @f)
+		;;
+	elif xn
+		/*
+		   (< 0)^(anything) is undefined. This comes from
+		   thinking of floating-point numbers as representing
+		   small ranges of real numbers. If you really want
+		   to compute (-1.23)^5, use pown.
+		 */
+		-> d.frombits(d.nan)
+	elif (xb & ~d.sgnmask == d.inf)
+		if (yb & ~d.sgnmask == 0)
+			/* oo^0 is undefined */
+			-> d.frombits(d.nan)
+		elif yn
+			/* +/-oo^(< 0) is +/-0 */
+			-> d.assem(xn, 0, 0)
+		elif xn
+			/* (-oo)^(anything) is undefined */
+			-> d.frombits(d.nan)
+		else
+			/* oo^(> 0) is oo */
+			-> d.frombits(d.inf)
+		;;
+	elif std.eq(y, (1.0 : @f))
+		/* x^1 = x */
+		-> x
+	elif yb & ~d.sgnmask == 0
+		/* (finite, positive)^0 = 1 */
+		-> (1.0 : @f)
+	elif std.eq(x, (1.0 : @f))
+		if yb & ~d.sgnmask == d.inf
+			/* 1^oo is undefined */
+			-> d.frombits(d.nan)
+		else
+			/* 1^(finite, positive) = 1 */
+			-> (1.0 : @f)
+		;;
+	elif yb & ~d.sgnmask == d.inf
+		if xe < 0
+			/* (0 < x < 1)^oo = 0 */
+			-> (0.0 : @f)
+		else
+			/* (x > 1)^oo = oo */
+			-> d.frombits(d.inf)
+		;;
+	;;
+
+	/* Normalize x and y */
+	if xe < d.emin
+		var first_1 = find_first1_64((xs : uint64), (d.precision : int64))
+		var offset = (d.precision : @u) - 1 - (first_1 : @u)
+		xs = xs << offset
+		xe = d.emin - offset
+	;;
+
+	if ye < d.emin
+		var first_1 = find_first1_64((ys : uint64), (d.precision : int64))
+		var offset = (d.precision : @u) - 1 - (first_1 : @u)
+		ys = ys << offset
+		ye = d.emin - offset
+	;;
+
+	/*
+           Split x into 2^N * F * (1 + f), with F = 1 + j/128 (some
+           j) and f tiny. Compute F naively by truncation. Compute
+           f via f = (x' - 1 - F)/(1 + F), where 1/(1 + F) is
+           precomputed and x' is x/2^N. 128 is chosen so that we
+           can borrow some constants from log-impl.myr.
+
+           [Tan90] hints at a method of computing x^y which may be
+           comparable to this approach, but which is unfortunately
+           has not been elaborated on (as far as I can discover).
+	 */
+	var N = xe
+	var j, F, Fn, Fe, Fs
+	var xprime = d.assem(false, 0, xs)
+
+	if need_round_away(0, (xs : uint64), (d.sig8last : int64))
+		F = d.frombits((d.tobits(xprime) & d.sig8mask) + (1 << d.sig8last))
+	else
+		F = d.frombits(d.tobits(xprime) & d.sig8mask)
+	;;
+
+	(Fn, Fe, Fs) = d.explode(F)
+
+	if Fe != 0
+		j = 128
+	else
+		j = 0x7f & ((d.sig8mask & Fs) >> d.sig8last)
+	;;
+
+	var f = (xprime - F)/F
+
+	/*
+	   y could actually be above integer infinity, in which
+	   case x^y is most certainly infinity of 0. More importantly,
+	   we can't safely compute M (below).
+	 */
+	if x > (1.0 : @f)
+		if y > d.frombits(d.eps_inf_border)
+			-> d.frombits(d.inf)
+		elif -y > d.frombits(d.eps_inf_border)
+			-> (0.0 : @f)
+		;;
+	elif x < (1.0 : @f)
+		if y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
+			-> (0.0 : @f)
+		elif -y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
+			-> d.frombits(d.inf)
+		;;
+	;;
+
+	/* Split y into M + y', with |y'| <= 0.5 and M an integer */
+	var M = floor(y)
+	var yprime = y - M
+	if yprime > (0.5 : @f)
+		M += (1.0 : @f)
+		yprime = y - M
+	elif yprime < (-0.5 : @f)
+		M -= (1.0: @f)
+		yprime = y - M
+	;;
+
+	/*
+	   We'll multiply y' by log(2) and try to keep extra
+	   precision, so we need to split y'. Since the high word
+	   of C has 24 - 10 = 14 significant bits (53 - 16 = 37 in
+	   flt64 case), we ensure 15 (39) trailing zeroes in
+	   yprime_hi.  (We also need this for y'*N, M, &c).
+	 */
+	var yprime_hi = d.frombits(d.tobits(yprime) & d.split_prec_mask)
+	var yprime_lo = yprime - yprime_hi
+	var yprimeN_hi = d.frombits(d.tobits((N : @f) * yprime) & d.split_prec_mask)
+	var yprimeN_lo = fma((N : @f),  yprime, -yprimeN_hi)
+	var M_hi = d.frombits(d.tobits(M) & d.split_prec_mask)
+	var M_lo = M - M_hi
+
+	/* 
+	   At this point, we've built out
+	   
+	       x^y = [ 2^N * F * (1 + f) ]^(M + y')
+	
+	   where N, M are integers, F is well-known, and f, y' are
+	   tiny. So we can get to computing
+
+	        /-1-\     /-------------------2--------------------------\     /-3--\
+	       2^(N*M) * exp(log(F)*y' + log2*N*y' + log(F)*M + M*log(1+f)) * (1+f)^y'
+
+	   where 1 can be handled by scale2, 2 we can mostly fake
+	   by sticking high-precision values for log(F) and log(2)
+	   through exp(), and 3 is composed of small numbers,
+	   therefore can be reasonably approximated by a Taylor
+	   expansion.
+	 */
+
+	/* t2 */
+	var log2_lo, log2_hi, Cu_hi, Cu_lo
+	(log2_hi, log2_lo) = d.C[128]
+	(Cu_hi, Cu_lo) = d.C[j]
+
+	var es : @f[20]
+	std.slfill(es[:], (0.0 : @f))
+
+	/* log(F) * y' */
+	es[0] = d.frombits(Cu_hi) * yprime_hi
+	es[1] = d.frombits(Cu_lo) * yprime_hi
+	es[2] = d.frombits(Cu_hi) * yprime_lo
+	es[3] = d.frombits(Cu_lo) * yprime_lo
+
+	/* log(2) * N * y' */
+	es[4] = d.frombits(log2_hi) * yprimeN_hi
+	es[5] = d.frombits(log2_lo) * yprimeN_hi
+	es[6] = d.frombits(log2_hi) * yprimeN_lo
+	es[7] = d.frombits(log2_lo) * yprimeN_lo
+
+	/* log(F) * M */
+	es[8] = d.frombits(Cu_hi) * M_hi
+	es[9] = d.frombits(Cu_lo) * M_hi
+	es[10] = d.frombits(Cu_hi) * M_lo
+	es[11] = d.frombits(Cu_lo) * M_lo
+
+	/* log(1 + f) * M */
+	var lf = log1p(f)
+	var lf_hi = d.frombits(d.tobits(lf) & d.split_prec_mask)
+	var lf_lo = lf - lf_hi
+	es[12] = lf_hi * M_hi
+	es[13] = lf_lo * M_hi
+	es[14] = lf_hi * M_lo
+	es[15] = lf_lo * M_lo
+
+	/*
+	   The correct way to handle this would be to compare
+	   magnitudes of eis and parenthesize the additions correctly.
+	   We take the cheap way out.
+	 */
+	var exp_hi = priest_sum(es[0:16])
+
+	/*
+	   We would like to just compute exp(exp_hi) * exp(exp_lo).
+	   However, if that takes us into subnormal territory, yet
+	   N * M is large, that will throw away a few bits of
+	   information. We can correct for this by adding in a few
+	   copies of P*log(2), then subtract off P when we compute
+	   scale2() at the end.
+
+	   We also have to be careful that P doesn't have too many
+	   significant bits, otherwise we throw away some information
+	   of log2_hi.
+	 */
+	var P = -rn(exp_hi / d.frombits(log2_hi))
+	var P_f = (P : @f)
+	P_f = d.frombits(d.tobits(P_f) & d.split_prec_mask2)
+	P = rn(P_f)
+
+	es[16] = P_f * d.frombits(log2_hi)
+	es[17] = P_f * d.frombits(log2_lo)
+	exp_hi = priest_sum(es[0:18])
+	es[18] = -exp_hi
+	var exp_lo = priest_sum(es[0:19])
+
+
+	var t2 = exp(exp_hi) * exp(exp_lo)
+
+	/*
+	   t3: Abbreviated Taylor expansion for (1 + f)^y' - 1.
+	   Since f is on the order of 2^-7 (and y' is on the order
+	   of 2^-1), we need to go up to f^3 for single-precision,
+	   and f^7 for double. We can then compute (1 + t3) * t2
+
+	   The expansion is \Sum_{k=1}^{\infty} {y' \choose k} x^k
+	 */
+	var terms : @f[10] = [
+		(0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),
+		(0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),
+	]
+	var current = (1.0 : @f)
+	for var j = 0; j <= d.itercount; ++j
+		current = current * f * (yprime - (j : @f)) / ((j : @f) + (1.0 : @f))
+		terms[j] = current
+	;;
+	var t3 = priest_sum(terms[0:d.itercount + 1])
+
+	var total_exp_f = (N : @f) * M - (P : @f)
+	if total_exp_f > ((d.emax - d.emin + d.precision + 1) : @f)
+		-> d.frombits(d.inf)
+	elif total_exp_f < -((d.emax - d.emin + d.precision + 1) : @f)
+		-> (0.0 : @f)
+	;;
+
+	/*
+	   Pull t2's exponent out so that we don't hit subnormal
+	   calculation with the t3 multiplication
+	 */
+	var t2n, t2e, t2s
+	(t2n, t2e, t2s) = d.explode(t2)
+
+	if t2e < d.emin
+		var t2_first_1 = find_first1_64((t2s : uint64), (d.precision : int64))
+		var t2_offset = (d.precision : @u) - 1 - (t2_first_1 : @u)
+		t2s = t2s << t2_offset
+		t2e = d.emin - (t2_offset : @i)
+	;;
+
+	t2 = d.assem(t2n, 0, t2s)
+	P -= t2e
+
+	var base = fma(t2, t3, t2)
+	-> scale2(base, N * rn(M) - P)
+}
--- a/lib/math/references
+++ b/lib/math/references
@@ -6,6 +6,10 @@
 Science 351 (1 2006), pp. 101–110. doi:
 https://doi.org/10.1016/j.tcs.2005.09.056.
 
+[Mar00]
+Peter Markstein. IA-64 and elementary functions : speed and precision.
+Upper Saddle River, NJ: Prentice Hall, 2000. isbn: 9780130183484.
+
 [Mul+10]
 Jean-Michel Muller et al. Handbook of floating-point arithmetic.
 Boston: Birkhäuser, 2010. isbn: 9780817647049.
--- a/lib/math/scale2-impl.myr
+++ b/lib/math/scale2-impl.myr
@@ -41,8 +41,11 @@
 				sprime++
 			;;
 			eprime = emin - 1
+		elif e + m < emin - p - 2
+			sprime = 0
+			eprime = emin - 1
 		elif e + m < emin
-			sprime = s >> (emin - m - e)
+			sprime = s >> ((emin - m - e) : @u)
 			if need_round_away(0, (s : uint64), ((emin - m - e) : int64))
 				sprime++
 			;;
--- a/lib/math/sqrt-impl.myr
+++ b/lib/math/sqrt-impl.myr
@@ -38,7 +38,7 @@
 
    In the flt64 case, we need only one more iteration.
  */
-const ab32 : (uint32, uint32)[:] = [
+const ab32 : (uint32, uint32)[7] = [
 	(0x3f800000, 0x3f800000), /* Nothing should ever get normalized to < 1.0 */
 	(0x3fa66666, 0x3f6f30ae), /* [1.0,  1.3 ) -> 0.9343365431 */
 	(0x3fd9999a, 0x3f5173ca), /* [1.3,  1.7 ) -> 0.8181730509 */
@@ -46,9 +46,9 @@
 	(0x40333333, 0x3f215342), /* [2.25, 2.8 ) -> 0.6301766634 */
 	(0x4059999a, 0x3f118e0e), /* [2.8,  3.4 ) -> 0.5685738325 */
 	(0x40800000, 0x3f053049), /* [3.4,  4.0 ) -> 0.520268023  */
-][:]
+]
 
-const ab64 : (uint64, uint64)[:] = [
+const ab64 : (uint64, uint64)[8] = [
 	(0x3ff0000000000000, 0x3ff0000000000000), /* < 1.0 */
 	(0x3ff3333333333333, 0x3fee892ce1608cbc), /* [1.0,  1.2)  -> 0.954245033445111356940060431953 */
 	(0x3ff6666666666666, 0x3fec1513a2184094), /* [1.2,  1.4)  -> 0.877572838393478438234751592972 */
@@ -57,7 +57,7 @@
 	(0x400599999999999a, 0x3fe47717c17cd34f), /* [2.2,  2.7)  -> 0.639537694840876969060161627567 */
 	(0x400b333333333333, 0x3fe258df212a8e9a), /* [2.7,  3.4)  -> 0.573348583963212421465982515656 */
 	(0x4010000000000000, 0x3fe0a5989f2dc59a), /* [3.4,  4.0)  -> 0.520214377304159869552790951275 */
-][:]
+]
 
 const desc32 : fltdesc(flt32, uint32, int32) =  [
 	.explode = std.flt32explode,
@@ -70,7 +70,7 @@
 	.emax = 128,
 	.normmask = 1 << 23,
 	.sgnmask = 1 << 31,
-	.ab = ab32,
+	.ab = ab32[0:7],
 	.iterlim = 3,
 ]
 
@@ -85,7 +85,7 @@
 	.emax = 1024,
 	.normmask = 1 << 52,
 	.sgnmask = 1 << 63,
-	.ab = ab64,
+	.ab = ab64[0:8],
 	.iterlim = 4,
 ]
 
@@ -174,11 +174,11 @@
 		var r_plus_ulp : @f = d.frombits(d.tobits(r) + 1)
 		var r_minus_ulp : @f = d.frombits(d.tobits(r) - 1)
 
-		var delta_1 = d.fma(r, r_minus_ulp, -1.0 * f)
+		var delta_1 = d.fma(r, r_minus_ulp, -1.0 * x)
 		if d.tobits(delta_1) & d.sgnmask == 0
 			r = r_minus_ulp
 		else
-			var delta_2 = d.fma(r, r_plus_ulp, -1.0 * f)
+			var delta_2 = d.fma(r, r_plus_ulp, -1.0 * x)
 			if d.tobits(delta_2) & d.sgnmask != 0
 				r = r_plus_ulp
 			else
--- /dev/null
+++ b/lib/math/test/powr-impl.myr
@@ -1,0 +1,76 @@
+use std
+use math
+use testr
+
+const main = {
+	math.fptrap(false)
+	testr.run([
+		[.name="powr-01", .fn = powr01],
+		[.name="powr-02", .fn = powr02],
+		[.name="powr-03", .fn = powr03],
+	][:])
+}
+
+const powr01 = {c
+	var inputs : (uint32, uint32, uint32)[:] = [
+		(0x08a38749, 0x2ffb67c0, 0x3f7fffff),
+		(0x01433ed5, 0x367caeda, 0x3f7feaba),
+		(0x7112fd5b, 0x7509b252, 0x7f800000),
+		(0x22b5f461, 0xc7335035, 0x7f800000),
+		(0x29529847, 0x43c6b361, 0x00000000),
+		(0x3fc1cc03, 0x64eb4c95, 0x7f800000),
+		(0x653f944a, 0xbf7c2388, 0x1a3c784b),
+		(0x545ba67c, 0xc0c7e947, 0x00000000),
+		(0x3fca6b0d, 0x44ff18e0, 0x7f800000),
+		(0x3f74c7a7, 0x44feae20, 0x000265c6),
+		(0x3f7ebd6c, 0xc5587884, 0x4bc9ab07),
+	][:]
+
+	for (x, y, z) : inputs
+		var xf : flt32 = std.flt32frombits(x)
+		var yf : flt32 = std.flt32frombits(y)
+		var zf : flt32 = std.flt32frombits(z)
+		var rf = math.powr(xf, yf)
+		testr.check(c, rf == zf,
+			"powr(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+			x, y, z, std.flt32bits(rf))
+	;;
+}
+
+const powr02 = {c
+	var inputs : (uint64, uint64, uint64)[:] = [
+		(0x0000000000000000, 0x0000000000000000, 0x0000000000000000),
+	][:]
+
+	for (x, y, z) : inputs
+		var xf : flt64 = std.flt64frombits(x)
+		var yf : flt64 = std.flt64frombits(y)
+		var zf : flt64 = std.flt64frombits(z)
+		var rf = math.powr(xf, yf)
+		testr.check(c, rf == zf,
+			"powr(0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+			x, y, z, std.flt64bits(rf))
+	;;
+}
+
+const powr03 = {c
+	var inputs : (uint32, uint32, uint32, uint32)[:] = [
+		(0x1bd2244e, 0x3a647973, 0x3f7535a1, 0x3f7535a0),
+		(0x3f264a46, 0x423c927a, 0x30c9b8d3, 0x30c9b8d4),
+		(0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f7),
+		(0x3bbd11f6, 0x3cc159b1, 0x3f62ac1b, 0x3f62ac1a),
+		(0x3f7ca5b7, 0xc309857a, 0x40c41bbf, 0x40c41bc0),
+		(0x3f6a1a65, 0x43e16065, 0x226731e2, 0x226731e3),
+	][:]
+
+	for (x, y, z_perfect, z_accepted) : inputs
+		var xf : flt32 = std.flt32frombits(x)
+		var yf : flt32 = std.flt32frombits(y)
+		var zf_perfect : flt32 = std.flt32frombits(z_perfect)
+		var zf_accepted : flt32 = std.flt32frombits(z_accepted)
+		var rf = math.powr(xf, yf)
+		testr.check(c, rf == zf_perfect || rf == zf_accepted,
+			"powr(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+			x, y, z_perfect, z_accepted, std.flt32bits(rf))
+	;;
+}
--- a/lib/math/test/scale2-impl.myr
+++ b/lib/math/test/scale2-impl.myr
@@ -13,6 +13,7 @@
 
 const scale201 = {c
 	var inputsf : (flt32, int32, flt32)[:] = [
+		(0.000000011971715, -246, 0.0),
 		(0.0, 1, 0.0),
 		(-0.0, 2, -0.0),
 		(1.0, 3, 8.0),
@@ -25,7 +26,9 @@
 	][:]
 
 	for (f, m, g) : inputsf
-		testr.eq(c, math.scale2(f, m), g)
+		var r = math.scale2(f, m)
+		testr.check(c, r == g, "scale2(0x{w=8,b=16,p=0}, {}) should be 0x{w=8,b=16,p=0}, was 0x{w=8,b=16,p=0}",
+		std.flt32bits(f), m, std.flt32bits(g), std.flt32bits(r))
 	;;
 }
 
--- a/lib/math/trunc-impl.myr
+++ b/lib/math/trunc-impl.myr
@@ -33,10 +33,10 @@
 	if n
 		var fractional_mask = Flt32SigMask >> (e : uint32)
 		if s & fractional_mask == 0
-			-> f
+			-> x
 		else
 			/* Turns out the packing of exp and sig is useful */
-			var u : uint32 = std.flt32bits(f) & ~fractional_mask
+			var u : uint32 = std.flt32bits(x) & ~fractional_mask
 			u += ((1 << 23) >> (e : uint32))
 			-> std.flt32frombits(u)
 		;;