shithub: mc

ref: e9b59bee4efb648e323dfb6269209b59c575806f
dir: /lib/math/powr-impl.myr/

View raw version
use std

use "fpmath"
use "impls"
use "log-impl"
use "log-overkill"
use "util"

/*
   This is an implementation of powr, not pow, so the special cases
   are tailored more closely to the mathematical x^y = e^(y * log(x))
   than to historical C implementations (pow was aligned to the C99
   standard, which was aligned to codify existing practice).

   Even then, some parts of the powr specification are unclear. For
   example, IEEE 754-2008 does not specify what powr(infty, y) must
   return when y is not 0.0 (an erratum was planned in 2010, but
   does not appear to have been released as of 2018).
 */
pkg math =
	pkglocal const powr32 : (x : flt32, y : flt32 -> flt32)
	pkglocal const powr64 : (x : flt64, y : flt64 -> flt64)
;;

type fltdesc(@f, @u, @i) = struct
	explode : (f : @f -> (bool, @i, @u))
	assem : (n : bool, e : @i, s : @u -> @f)
	tobits : (f : @f -> @u)
	frombits : (u : @u -> @f)
	nan : @u
	inf : @u
	expmask : @u
	precision : @u
	emax : @i
	emin : @i
	sgnmask : @u
	log_overkill : (x : @f -> (@f, @f))
	fma : (x : @f, y : @f, z : @f -> @f)
	split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f))
	exp_zero_border : @u
;;

const desc32 : fltdesc(flt32, uint32, int32) =  [
	.explode = std.flt32explode,
	.assem = std.flt32assem,
	.tobits = std.flt32bits,
	.frombits = std.flt32frombits,
	.nan = 0x7fc00000,
	.inf = 0x7f800000,
	.expmask = 0x7f800000, /* mask to detect inf or NaN (inf, repeated for clarity) */
	.precision = 24,
	.emax = 127,
	.emin = -126,
	.sgnmask = 1 << 31,
	.log_overkill = logoverkill32,
	.fma = fma32,
	.split_mul = split_mul32,
	.exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */
]

const desc64 : fltdesc(flt64, uint64, int64) =  [
	.explode = std.flt64explode,
	.assem = std.flt64assem,
	.tobits = std.flt64bits,
	.frombits = std.flt64frombits,
	.nan = 0x7ff8000000000000,
	.inf = 0x7ff0000000000000,
	.expmask = 0x7ff0000000000000,
	.precision = 53,
	.emax = 1023,
	.emin = -1022,
	.sgnmask = 1 << 63,
	.log_overkill = logoverkill64,
	.fma = fma64,
	.split_mul = hl_mult,
	.exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */
]

const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32
	var x : flt64 = (x_h : flt64) + (x_l : flt64)
	var y : flt64 = (y_h : flt64) + (y_l : flt64)
	var z = x * y
	var z_h : flt32 = (z : flt32)
	var z_l : flt32 = ((z - (z_h : flt64)) : flt32)
	-> (z_h, z_l)
}

const powr32 = {x : flt32, y : flt32
	-> powrgen(x, y, desc32)
}

const powr64 = {x : flt64, y : flt64
	-> powrgen(x, y, desc64)
}

generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
	var xb, yb
	xb = d.tobits(x)
	yb = d.tobits(y)

	var xn : bool, xe : @i, xs : @u
	var yn : bool, ye : @i, ys : @u
	(xn, xe, xs) = d.explode(x)
	(yn, ye, ys) = d.explode(y)

	/*
	   Special cases. Note we do not follow IEEE exceptions.
	 */
	if std.isnan(x) || std.isnan(y)
		/* Propagate NaN */
		-> d.frombits(d.nan)
	elif (xb & ~d.sgnmask == 0)
		if (yb & ~d.sgnmask == 0)
			/* 0^0 is undefined. */
			-> d.frombits(d.nan)
		elif yn
			/* 0^(< 0) is infinity */
			-> d.frombits(d.inf)
		else
			/* otherwise, 0^y = 0. */
			-> (0.0 : @f)
		;;
	elif xn
		/*
		   (< 0)^(anything) is undefined. This comes from
		   thinking of floating-point numbers as representing
		   small ranges of real numbers. If you really want
		   to compute (-1.23)^5, use pown.
		 */
		-> d.frombits(d.nan)
	elif (xb & ~d.sgnmask == d.inf)
		if (yb & ~d.sgnmask == 0)
			/* oo^0 is undefined */
			-> d.frombits(d.nan)
		elif yn
			/* +/-oo^(< 0) is +/-0 */
			-> d.assem(xn, 0, 0)
		elif xn
			/* (-oo)^(anything) is undefined */
			-> d.frombits(d.nan)
		else
			/* oo^(> 0) is oo */
			-> d.frombits(d.inf)
		;;
	elif std.eq(y, (1.0 : @f))
		/* x^1 = x */
		-> x
	elif yb & ~d.sgnmask == 0
		/* (finite, positive)^0 = 1 */
		-> (1.0 : @f)
	elif std.eq(x, (1.0 : @f))
		if yb & ~d.sgnmask == d.inf
			/* 1^oo is undefined */
			-> d.frombits(d.nan)
		else
			/* 1^(finite, positive) = 1 */
			-> (1.0 : @f)
		;;
	elif yb & ~d.sgnmask == d.inf
		if xe < 0
			/* (0 < x < 1)^oo = 0 */
			-> (0.0 : @f)
		else
			/* (x > 1)^oo = oo */
			-> d.frombits(d.inf)
		;;
	;;

	/*
	   Just do the dumb thing: compute exp( log(x) · y ). All the hard work
	   goes into computing log(x) with high enough precision that our exp()
	   implementation becomes the weakest link. The Table Maker's Dilemma
	   says that quantifying "high enough" is a very difficult problem, but
	   experimentally twice the precision of @f appears quite good enough.
	 */
	var ln_x_hi, ln_x_lo
	(ln_x_hi, ln_x_lo) = d.log_overkill(x)

	var final_exp_hi, final_exp_lo
	(final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0)

	if d.tobits(final_exp_hi) & d.expmask == d.inf
		/*
		   split_mul doesn't actually preserve the sign of infinity, so we can't
		   trust final_exp_hi to get it.
		 */
		if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask)
			/* e^+Inf */
			-> d.frombits(d.inf)
		else
			/* e^-Inf */
			-> 0.0
		;;
	;;

	if final_exp_hi < d.frombits(d.exp_zero_border)
		-> 0.0
	;;

	var z_hi = exp(final_exp_hi)
	if d.tobits(z_hi) & d.expmask == d.inf
		-> z_hi
	;;

	-> d.fma(z_hi, final_exp_lo, z_hi)
}