ref: a33e94a67326d7774e7373b83597d84ee1eb347b
parent: 7c4d2abbe14d3d0e9d2f5872e0964677c5dc783d
author: S. Gilles <[email protected]>
date: Wed Mar 21 15:17:41 EDT 2018
Replace fma32 with simpler, cleaner algorithm from musl
--- a/lib/math/fpmath-fma-impl.myr
+++ b/lib/math/fpmath-fma-impl.myr
@@ -28,34 +28,73 @@
;;
var r : flt64 = prod + zd
+ var rn, re, rs
+ (rn, re, rs) = std.flt64explode(r)
- var zn, ze, zs
- (zn, ze, zs) = std.flt32explode(z)
- if ze == -127
- ze = -126
+ /*
+ At this point, r is probably the correct answer. The
+ only issue is that if truncating r to a flt32 causes
+ rounding-to-even, and it was obtained by rounding in the
+ first place, direction, then we'd be over-rounding. The
+ only way this could possibly be a problem is if the
+ product step rounded to the halfway point (a 100...0,
+ with the 1 just outside truncation range).
+ */
+ if re == 1024 || rs & 0x1fffffff != 0x10000000
+ -> flt32fromflt64(r)
;;
- var beyond : int64 = 0
- if pe - 52 > (ze : int64) - 23
- /*
- Identify all the bits of z that were too far
- away to be added into the product. If there are
- any, they will be used later to influence rounding
- away from the halfway mark.
- */
- var shift = std.min((pe - 52) - ((ze : int64) - 23) - 1, 64)
- var zs1 : uint64 = (zs : uint64)
- if zs1 & shr((-1 : uint64), 64 - shift) != 0
- if zn == pn
- beyond = 1
- else
- beyond = -1
- ;;
+ /* We can check if rounding was performed by undoing */
+ if flt32fromflt64(r - prod) == z
+ -> flt32fromflt64(r)
+ ;;
+
+ /*
+ At this point, there's definitely about to be a rounding
+ error. To figure out what to do, compute prod + z with
+ round-to-zero. If we get r again, then it's okay to round
+ r upwards, because it hasn't been rounded away from zero
+ yet and we allow ourselves one such rounding.
+ */
+ var zn, ze, zs
+ (zn, ze, zs) = std.flt64explode(zd)
+ if ze == -1023
+ ze = -1022
+ ;;
+ var rtn, rte, rts
+ if pe >= ze && pn == zn
+ (rtn, rte, rts) = (pn, pe, ps)
+ rts += shr(zs, pe - ze)
+ elif pe > ze || (pe == ze && ps > zs)
+ (rtn, rte, rts) = (pn, pe, ps)
+ rts -= shr(zs, pe - ze)
+ if shr((-1 : uint64), 64 - std.min(64, (pe - ze))) & zs != 0
+ rts--
;;
+ elif pe < ze && pn == zn
+ (rtn, rte, rts) = (zn, ze, zs)
+ rts += shr(ps, ze - pe)
+ else
+ (rtn, rte, rts) = (zn, ze, zs)
+ rts -= shr(ps, ze - pe)
+ if shr((-1 : uint64), 64 - std.min(64, (ze - pe))) & ps != 0
+ rts--
+ ;;
;;
- var r32 : flt32 = flt32fromflt64(r, beyond)
- -> r32
+ if rts & (1 << 53) != 0
+ rts >>= 1
+ rte++
+ ;;
+
+ if rn == rtn && rs == rts && re == rte
+ rts++
+ if rts & (1 << 53) != 0
+ rts >>= 1
+ rte++
+ ;;
+ ;;
+ -> flt32fromflt64(std.flt64assem(rtn, rte, rts))
}
const flt64fromflt32 = {f : flt32
@@ -84,7 +123,7 @@
-> std.flt64assem(n, xe, xs << (52 - 23))
}
-const flt32fromflt64 = {f : flt64, beyond : int64
+const flt32fromflt64 = {f : flt64
var n : bool, e : int64, s : uint64
(n, e, s) = std.flt64explode(f)
var ts : uint32
@@ -103,7 +142,7 @@
if e >= -126
/* normal */
ts = ((s >> (52 - 23)) : uint32)
- if need_round_away(0, s, 52 - 23, beyond)
+ if need_round_away(0, s, 52 - 23)
ts++
if ts & (1 << 24) != 0
ts >>= 1
@@ -123,7 +162,7 @@
var shift : int64 = (52 - 23) + (-126 - e)
var ts1 = shr(s, shift)
ts = (ts1 : uint32)
- if need_round_away(0, s, shift, beyond)
+ if need_round_away(0, s, shift)
ts++
;;
-> std.flt32assem(n, te, ts)
@@ -283,7 +322,7 @@
res_s |= shl(res_l, -12 + (res_lastbit_e + 64) - (-1022))
;;
- if need_round_away(res_h, res_l, res_first1 + (-1074 - res_firstbit_e), 0)
+ if need_round_away(res_h, res_l, res_first1 + (-1074 - res_firstbit_e))
res_s++
;;
@@ -306,13 +345,13 @@
if res_first1 - 52 >= 64
res_s = shr(res_h, (res_first1 : int64) - 64 - 52)
- if need_round_away(res_h, res_l, res_first1 - 52, 0)
+ if need_round_away(res_h, res_l, res_first1 - 52)
res_s++
;;
elif res_first1 - 52 >= 0
res_s = shl(res_h, 64 - (res_first1 - 52))
res_s |= shr(res_l, res_first1 - 52)
- if need_round_away(res_h, res_l, res_first1 - 52, 0)
+ if need_round_away(res_h, res_l, res_first1 - 52)
res_s++
;;
else
@@ -484,14 +523,10 @@
- following bitpos_last is a 1, then a zero sequence, and the
round would be to even
-
- The beyond parameter indicates whether there is lingering
- addition/subtraction past the range of l. This adds a bit more
- information about rounding before we hit round-to-even.
*/
-const need_round_away = {h : uint64, l : uint64, bitpos_last : int64, beyond : int64
+const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
var first_omitted_is_1 = false
- var nonzero_beyond = beyond > 0
+ var nonzero_beyond = false
if bitpos_last > 64
first_omitted_is_1 = h & shl(1, bitpos_last - 1 - 64) != 0
nonzero_beyond = nonzero_beyond || h & shr((-1 : uint64), 2 + 64 - (bitpos_last - 64)) != 0
@@ -517,5 +552,5 @@
hl_is_odd = l & shl(1, bitpos_last) != 0
;;
- -> hl_is_odd && (beyond >= 0)
+ -> hl_is_odd
}