ref: 767534bdbac53853d443aaf095814c687fb5f486
dir: /lib/math/powr-impl.myr/
use std use "fpmath" use "impls" use "log-impl" use "log-overkill" use "util" /* This is an implementation of powr, not pow, so the special cases are tailored more closely to the mathematical x^y = e^(y * log(x)) than to historical C implementations (pow was aligned to the C99 standard, which was aligned to codify existing practice). Even then, some parts of the powr specification are unclear. For example, IEEE 754-2008 does not specify what powr(infty, y) must return when y is not 0.0 (an erratum was planned in 2010, but does not appear to have been released as of 2018). */ pkg math = pkglocal const powr32 : (x : flt32, y : flt32 -> flt32) pkglocal const powr64 : (x : flt64, y : flt64 -> flt64) ;; type fltdesc(@f, @u, @i) = struct explode : (f : @f -> (bool, @i, @u)) assem : (n : bool, e : @i, s : @u -> @f) tobits : (f : @f -> @u) frombits : (u : @u -> @f) nan : @u inf : @u expmask : @u precision : @u emax : @i emin : @i sgnmask : @u log_overkill : (x : @f -> (@f, @f)) fma : (x : @f, y : @f, z : @f -> @f) split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f)) exp_zero_border : @u ;; const desc32 : fltdesc(flt32, uint32, int32) = [ .explode = std.flt32explode, .assem = std.flt32assem, .tobits = std.flt32bits, .frombits = std.flt32frombits, .nan = 0x7fc00000, .inf = 0x7f800000, .expmask = 0x7f800000, /* mask to detect inf or NaN (inf, repeated for clarity) */ .precision = 24, .emax = 127, .emin = -126, .sgnmask = 1 << 31, .log_overkill = logoverkill32, .fma = fma32, .split_mul = split_mul32, .exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */ ] const desc64 : fltdesc(flt64, uint64, int64) = [ .explode = std.flt64explode, .assem = std.flt64assem, .tobits = std.flt64bits, .frombits = std.flt64frombits, .nan = 0x7ff8000000000000, .inf = 0x7ff0000000000000, .expmask = 0x7ff0000000000000, .precision = 53, .emax = 1023, .emin = -1022, .sgnmask = 1 << 63, .log_overkill = logoverkill64, .fma = fma64, .split_mul = hl_mult, .exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */ ] const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32 var x : flt64 = (x_h : flt64) + (x_l : flt64) var y : flt64 = (y_h : flt64) + (y_l : flt64) var z = x * y var z_h : flt32 = (z : flt32) var z_l : flt32 = ((z - (z_h : flt64)) : flt32) -> (z_h, z_l) } const powr32 = {x : flt32, y : flt32 -> powrgen(x, y, desc32) } const powr64 = {x : flt64, y : flt64 -> powrgen(x, y, desc64) } generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i var xb, yb xb = d.tobits(x) yb = d.tobits(y) var xn : bool, xe : @i, xs : @u var yn : bool, ye : @i, ys : @u (xn, xe, xs) = d.explode(x) (yn, ye, ys) = d.explode(y) /* Special cases. Note we do not follow IEEE exceptions. */ if std.isnan(x) || std.isnan(y) /* Propagate NaN */ -> d.frombits(d.nan) elif (xb & ~d.sgnmask == 0) if (yb & ~d.sgnmask == 0) /* 0^0 is undefined. */ -> d.frombits(d.nan) elif yn /* 0^(< 0) is infinity */ -> d.frombits(d.inf) else /* otherwise, 0^y = 0. */ -> (0.0 : @f) ;; elif xn /* (< 0)^(anything) is undefined. This comes from thinking of floating-point numbers as representing small ranges of real numbers. If you really want to compute (-1.23)^5, use pown. */ -> d.frombits(d.nan) elif (xb & ~d.sgnmask == d.inf) if (yb & ~d.sgnmask == 0) /* oo^0 is undefined */ -> d.frombits(d.nan) elif yn /* +/-oo^(< 0) is +/-0 */ -> d.assem(xn, 0, 0) elif xn /* (-oo)^(anything) is undefined */ -> d.frombits(d.nan) else /* oo^(> 0) is oo */ -> d.frombits(d.inf) ;; elif std.eq(y, (1.0 : @f)) /* x^1 = x */ -> x elif yb & ~d.sgnmask == 0 /* (finite, positive)^0 = 1 */ -> (1.0 : @f) elif std.eq(x, (1.0 : @f)) if yb & ~d.sgnmask == d.inf /* 1^oo is undefined */ -> d.frombits(d.nan) else /* 1^(finite, positive) = 1 */ -> (1.0 : @f) ;; elif yb & ~d.sgnmask == d.inf if xe < 0 /* (0 < x < 1)^oo = 0 */ -> (0.0 : @f) else /* (x > 1)^oo = oo */ -> d.frombits(d.inf) ;; ;; /* Just do the dumb thing: compute exp( log(x) · y ). All the hard work goes into computing log(x) with high enough precision that our exp() implementation becomes the weakest link. The Table Maker's Dilemma says that quantifying "high enough" is a very difficult problem, but experimentally twice the precision of @f appears quite good enough. */ var ln_x_hi, ln_x_lo (ln_x_hi, ln_x_lo) = d.log_overkill(x) var final_exp_hi, final_exp_lo (final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0) if d.tobits(final_exp_hi) & d.expmask == d.inf /* split_mul doesn't actually preserve the sign of infinity, so we can't trust final_exp_hi to get it. */ if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask) /* e^+Inf */ -> d.frombits(d.inf) else /* e^-Inf */ -> 0.0 ;; ;; if final_exp_hi < d.frombits(d.exp_zero_border) -> 0.0 ;; var z_hi = exp(final_exp_hi) if d.tobits(z_hi) & d.expmask == d.inf -> z_hi ;; -> d.fma(z_hi, final_exp_lo, z_hi) }