shithub: mc

Download patch

ref: 3c881e8b133876fc8cff92106616bb93166bf113
parent: a33e94a67326d7774e7373b83597d84ee1eb347b
parent: 65c3b339b547b49d1c1d11c56d86ca81f756e9be
author: S. Gilles <[email protected]>
date: Wed Mar 21 18:49:08 EDT 2018

Merge remote-tracking branch 'ori/master' into libmath

--- a/bench/bigfactorial.myr
+++ /dev/null
@@ -1,28 +1,0 @@
-use std
-use testr
-
-const main = {
-	testr.bench([
-		[.name="bigfactorial-1", .fn={ctx; bigfact(1)}],
-		[.name="bigfactorial-100", .fn={ctx; bigfact(100)}],
-		[.name="bigfactorial-1000", .fn={ctx; bigfact(1000)}],
-		[.name="bigfactorial-10000", .fn={ctx; bigfact(10000)}],
-	][:])
-}
-
-const bigfact = {n
-	var i
-	var x, y
-
-	if n == 0
-		x = std.mkbigint(1)
-	else
-		x = std.mkbigint(n)
-		for i = n-1; i > 0; i--
-			y = std.mkbigint(i)
-			std.bigmul(x, y)
-			std.bigfree(y)
-		;;
-	;;
-	-> x
-}
--- /dev/null
+++ b/bench/bigint.myr
@@ -1,0 +1,209 @@
+use std
+use testr
+
+const main = {
+	var a, b
+
+	a = std.get(std.bigparse(astr))
+	b = std.get(std.bigparse(bstr))
+	testr.bench([
+		[.name="bigfactorial-1",	.fn={ctx; bigfact(1)}],
+		[.name="bigfactorial-100",	.fn={ctx; bigfact(100)}],
+		[.name="bigfactorial-1000",	.fn={ctx; bigfact(1000)}],
+		[.name="bigfactorial-10000",	.fn={ctx; bigfact(10000)}],
+		[.name="bigmul-bothbig", 	.fn={ctx; bigmul(a, b)}],
+	][:])
+}
+
+const bigfact = {n
+	var i
+	var x, y
+
+	if n == 0
+		x = std.mkbigint(1)
+	else
+		x = std.mkbigint(n)
+		for i = n-1; i > 0; i--
+			y = std.mkbigint(i)
+			std.bigmul(x, y)
+			std.bigfree(y)
+		;;
+	;;
+	-> x
+}
+
+const bigmul = {a, b
+	var r = std.bigdup(a)
+	std.bigmul(r, b)
+	std.bigfree(r)
+}
+
+const astr = \
+	"786226786586600439317972610307453185844816038396887410361680" \
+	"057562738205768169440311788359575359662484806302309290559269" \
+	"528840503089778892166969404943179665354542662022501031093355" \
+	"271292443882546985002869031472840087285045002214375586489512" \
+	"119373071089907222993450120378930997902435685145827038085819" \
+	"296865519485069915539726113453392326014373721767082018422958" \
+	"717062721702582361925462085790615747641302901942695904395406" \
+	"834224622529709782228408774414640957447279927460707815981786" \
+	"406994864430875491075424852796017575153912350736693997340109" \
+	"179809249816546470560796013608371676540452470243110540567406" \
+	"131454930387632550432649269771702882918734039217249418671214" \
+	"578343865544182288638782234366945485699781398626899358422106" \
+	"382221586857747438786541396870348009298608214885868257313676" \
+	"649092812753573767742580234440720920671581633553397695353934" \
+	"132303144338968914681227568810598784423402415568066389616154" \
+	"614818275979465719251274076724898041828550631986321592027149" \
+	"396140957537591558324156214045032930669195479259917491373556" \
+	"8106323004563791966979449"
+
+const bstr = \
+	"187308423231504228611111080270112029294766282678254231118742" \
+	"976869035200603975231371287575334271604770730635239117286259" \
+	"633494749566591369947211092881454729202366597231676526591300" \
+	"648610814270646993620412724160164594497114189417456618983125" \
+	"754243263403220712648959862959656559260934591165571538609438" \
+	"590305501075673585878506798890948928998460512317734318100422" \
+	"301601872241161203780242712104705885977145795396152336901291" \
+	"797346494631790697490596412053544611041844456774577364325024" \
+	"070127433788225479200261038961006069382091341032772858886595" \
+	"126748124988322385484529859500424755864562815273650872459428" \
+	"604927038475035756775266526245762679156608674463485106692013" \
+	"334198263770435008762560094658051091261225051459463607555576" \
+	"909910323885506799911997243762894976997772525420734032010908" \
+	"232750510323630849750298879815023574738441149575389970852774" \
+	"177147447530826963987253002135706371359716529615192991331710" \
+	"358753881164433191999293286713740248283177543479283727214184" \
+	"724131647548393947909056785714667606883184085430899204162328" \
+	"826158851207379006944693295498006311569980249630659828437857" \
+	"675974052589856026178343278810044863262932813277009490639496" \
+	"536609737088886342712009625141736821206684450062603392153153" \
+	"404209821474892409175922249782691829080890709213170466577080" \
+	"377570831150029278547755147996902849300234357700933105059734" \
+	"535663014087527221170181231192295192940913638374876317838628" \
+	"513685521076746695302116096246663697634145614610623764326966" \
+	"034284535477345974781679205896638416144421212858502427308326" \
+	"932579573129945442468876557939601952816014080189349604230705" \
+	"365868935617410975446640273108649254906769023875954001915851" \
+	"664244700370024600464468562174205614316632651533572360375076" \
+	"397825425411906188567274646160369108041601789830804365153321" \
+	"786129972802577217675779977803595583990850309499110057500386" \
+	"963299465049980241112611167139170503250450996990310096129565" \
+	"074487177158657223017144483992415686889386032110805070566222" \
+	"391021704709176968022469625318920962164345682090022548949639" \
+	"373535013410140815528983041981416489359560548762773247027894" \
+	"433410249929507117822644701257908554656601684665483333856314" \
+	"445354748390977894664864352404291796587312266255127874784721" \
+	"619161441066868319035508018794580703871852715612431050120428" \
+	"135204914805088588056364008960061473425114169699056353553362" \
+	"684434702224467541950013135029769757734688969972162892564754" \
+	"794607854348784493170718776237831436345037722624425771190246" \
+	"018034960769040875816438953729709133678842811906949629452358" \
+	"733021110486943979165015168688484556308945464367476269193790" \
+	"416269749963298838914536589681414703854517183672658308513799" \
+	"079924527379662830333236086766377066200832536550279672992236" \
+	"534386204300849885908109898105830852146495986415657102811369" \
+	"595247130333705192417745868596617533375480391948415664293960" \
+	"986757335528872844228230415255122202991202927184519763895621" \
+	"784166281785570962980369244162505438453718048719964848583896" \
+	"857048160613386597344498469472976896546659619163220731196501" \
+	"323014018615871231780371169751103683603599982508157269363828" \
+	"411288512876302246976722591644117294608452539733909759953439" \
+	"531429839711890887198366410539288744251028511505016177386244" \
+	"749167411006850944434211305749841593738801775687547388254201" \
+	"939297777489971469491544250094576226609276643868397292318619" \
+	"638481092861366771094145813562129333311647611902685299968882" \
+	"120832495897029738418349759779717411822241304202704141876186" \
+	"021609477309152514796122336409731627673520687854759447557901" \
+	"364641037659836873710690927438194862217026533565020830538006" \
+	"474368344215360348610383201842027979922422061958838533809305" \
+	"769494429697360443510783300535985639987094328411985571590736" \
+	"668585260182179277329957988272484981238401320570887133681587" \
+	"104377431152847065007690135501166962851804129900971535475096" \
+	"780466463541055999770138832441321686999264335797733246187289" \
+	"627201885855368556615429895387994196080244415898422345868310" \
+	"490709239989887413180596388426579419833690268988006284505869" \
+	"050445508634068307421234527095591085036542160387367691944010" \
+	"975408030387045757333117117106642362746817079296595077266231" \
+	"539797438547718217585348401525110815431508041372063435381246" \
+	"031899285980848878201413896797811316833560553995491439595681" \
+	"923196663368947274989655741723930512233824969005097885029422" \
+	"357501820683035694227690760029389720555091326855256135152515" \
+	"991629667196092383555620493513052376816166150463402645212712" \
+	"906520400777603115930223645552970514327317217278507643140717" \
+	"182239925721890342870110303375808665304693222547743946076755" \
+	"692015880794045769310816721195424546206360150602831845794754" \
+	"107728490276051145716712025134135902681643628483224712234443" \
+	"063152270612970654343583032461561909892954923011301907408278" \
+	"407275948095435658167567889516277464704601392558782479050096" \
+	"372379916668232181458496952539316561733622359194587739442387" \
+	"588748382930523021381163994808898807073006256512121958841602" \
+	"953834143318421508854501873168004451093624401020578718864158" \
+	"163753449732176772997318191567270326934788485419221293796231" \
+	"298858119919950554081790900335797631743704599354917110833385" \
+	"431529946758628104742509312229561143992532198765988099227018" \
+	"878514141238218580104368183434902069663104345510818435635700" \
+	"414562606631845260797394601892129923774496571513678144171246" \
+	"566200747779503333919046242725933701038577717836624878658828" \
+	"401949953074213765717982884761623247802534761098061468371860" \
+	"040646573304592111813415676017629612099762583556889291591502" \
+	"037421011749177580329597161049249542989001421868003098607452" \
+	"447624875123240558047810930160637311667163115623275956058331" \
+	"757826897673135754479768852033554041590179245281724637243354" \
+	"117485618855501973830027844010664951610244096398130144699577" \
+	"573289088165216285450511045687327534575629779122305699816415" \
+	"839649332244222046600972555138062951807105050363438375687919" \
+	"992990152747901390216324885281609322926002453655984740862104" \
+	"800118089506795694008885846789831227726569500145265086949437" \
+	"988620853249497033861484487843582927839639572167539512092182" \
+	"766072426433981477788791769424931914440282201493136920334470" \
+	"631849934882150702051061806916010852413506785949573428717190" \
+	"652565822614387096957090183695781683632118089627541885999806" \
+	"415805712006844406519039722769587152345216028824048408671591" \
+	"996748454264778680080651811924705017452522895119289297677596" \
+	"399980344578097690385847397582483006778980686294815929593822" \
+	"079423101338051192607368480059375406606683609868638996461314" \
+	"025218799102080893035335496954072017203857904998271542125754" \
+	"657205938132168124359996375519697778439582860828566963816699" \
+	"746486300437187004803313727795498535830803859623878348202161" \
+	"779205893343935527641213944242785604444240371520170466676321" \
+	"928607939131173535257360320584979666827307689152405952207350" \
+	"699524761489708601133045296730863484253942716135384938226198" \
+	"568386927589550770858283642852352203645776295407127244203589" \
+	"644784502503420416982059860877101382070973271189722892984326" \
+	"410970961430713847387443991297677367803419085937220978538980" \
+	"441628473685496662320890458789716737710737534633335780887058" \
+	"229693686098443784495418741977154423684238599047296839730434" \
+	"085083647717961771343882046771178192554297991147688856565555" \
+	"090757465961600177144271662429462033026661376496701392139827" \
+	"805716034927247277392566480664873339284583953871193119773255" \
+	"759149930467692321302473790340664010828943491220928942540701" \
+	"809650948952021861576751017761266006860909899638475036335626" \
+	"074637477900802957227409245252385131499626970250373571243577" \
+	"269099399542105493137222898746523529168177015284588584593530" \
+	"535422847893997078481093842801730870845975425823397648701679" \
+	"269953351511574749765536142563534666242970982516216461548676" \
+	"091848844824472900050116980087609760065613615848853809078135" \
+	"539024293905056113075795105768447478038151819261968647482323" \
+	"595339889631890319991305740056522600422147869065281727002061" \
+	"985563827492146406348739661318028756838105073748842194458561" \
+	"389348774902798361516170139490919233843823391129815389669442" \
+	"389886962769612654436729345525800917085158998450443138829807" \
+	"226401369403848923672976087487989192421758085312025736674941" \
+	"465891203009498155699006389389940020187366385171748881799189" \
+	"859057487614182907767561954322147267560355277722058958977274" \
+	"007000776522440864120260304346026144122668525606742093385842" \
+	"877851187985354298444881951517964028828337147992244078392289" \
+	"033891527938598539476575275618308528833647822827687596807355" \
+	"015006465320113872907922736279079949111368490254190424134142" \
+	"526305891930318351790395305770103209586168680236305757463917" \
+	"504215452060579791599906033628162787284365361089361880425156" \
+	"013152471178783132851787237084697582323149702243584748375544" \
+	"864216759785482917668710098191387194325613366050379525378498" \
+	"060821148303033711198142478670004863329544955847264359312410" \
+	"124051026217318607753467506730576567256247484381058493859200" \
+	"638791430791009825392526429942820641023745462165966092444179" \
+	"625962380866337824952291146979992952948121844170079781600828" \
+	"3296406611"
+
--- a/bench/bld.sub
+++ b/bench/bld.sub
@@ -23,8 +23,8 @@
 	lib ../lib/crypto:crypto
 	lib ../lib/testr:testr
 ;;
-bench bigfactorial =
-	bigfactorial.myr
+bench bigint =
+	bigint.myr
 	lib ../lib/std:std
 	lib ../lib/sys:sys
 	lib ../lib/testr:testr
--- /dev/null
+++ b/lib/crypto/ctbig.myr
@@ -1,0 +1,272 @@
+use std
+
+use "ct"
+
+pkg crypto =
+	type ctbig = struct
+		nbit	: std.size
+		dig	: uint32[:] 	/* little endian, no leading zeros. */
+	;;
+
+	generic mkctbign 	: (v : @a, nbit : std.size -> ctbig#) :: numeric,integral @a
+	const mkctbigle	: (v : byte[:], nbit : std.size -> ctbig#)
+	//const mkctbigbe	: (v : byte[:], nbit : std.size -> ctbig#)
+
+	const ctfree	: (v : ctbig# -> void)
+	const ctbigdup	: (v : ctbig# -> ctbig#)
+	const ctlike	: (v : ctbig# -> ctbig#)
+	const ct2big	: (v : ctbig# -> std.bigint#)
+	const big2ct	: (v : std.bigint#, ndig : std.size -> ctbig#)
+
+	const ctadd	: (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+	const ctsub	: (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+	const ctmul	: (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+	//const ctdivmod	: (r : ctbig#, m : ctbig#, a : ctbig#, b : ctbig# -> void)
+	//const ctmodpow	: (r : ctbig#, a : ctbig#, b : ctbig# -> void)
+
+	const ctiszero	: (v : ctbig# -> bool)
+	const cteq	: (a : ctbig#, b : ctbig# -> bool)
+	const ctne	: (a : ctbig#, b : ctbig# -> bool)
+	const ctgt	: (a : ctbig#, b : ctbig# -> bool)
+	const ctge	: (a : ctbig#, b : ctbig# -> bool)
+	const ctlt	: (a : ctbig#, b : ctbig# -> bool)
+	const ctle	: (a : ctbig#, b : ctbig# -> bool)
+;;
+
+const Base = 0x100000000ul
+
+generic mkctbign = {v : @a, nbit : std.size :: integral,numeric @a
+	var a
+	var val
+
+	a = std.zalloc()
+
+	val = (v : uint64)
+	a.nbit = nbit
+	a.dig = std.slalloc(ndig(nbit))
+	if nbit > 0
+		a.dig[0] = (val : uint32)
+	;;
+	if nbit > 32
+		a.dig[1] = (val >> 32 : uint32)
+	;;
+	-> a
+}
+
+const ct2big = {ct
+	-> std.mk([
+		.sign=1,
+		.dig=std.sldup(ct.dig)
+	])
+}
+
+const big2ct = {ct, nbit
+	var v, n, l
+
+	n = ndig(nbit)
+	l = std.min(n, ct.dig.len)
+	v = std.slzalloc(n)
+	std.slcp(v, ct.dig[:l])
+	-> std.mk([
+		.nbit=nbit,
+		.dig=v,
+	])
+}
+
+const mkctbigle = {v, nbit
+	var a, last, i, o, off
+
+	/*
+	  It's ok to depend on the length of v here: we can leak the
+	  size of the numbers.
+	 */
+	o = 0
+	a = std.slzalloc(ndig(nbit))
+	for i = 0; i + 4 <= v.len; i += 4
+		a[o++] = \
+			(v[i + 0] <<  0 : uint32) | \
+			(v[i + 1] <<  8 : uint32) | \
+			(v[i + 2] << 16 : uint32) | \
+			(v[i + 3] << 24 : uint32)
+	;;
+
+	last = 0
+	for i; i < v.len; i++
+		off = i & 0x3
+		last |= (v[off] : uint32) << (8 *off)
+	;;
+	a[o++] = last
+	-> std.mk([.nbit=nbit, .dig=a])
+}
+
+const ctlike = {v
+	-> std.mk([
+		.nbit = v.nbit,
+		.dig=std.slzalloc(v.dig.len),
+	])
+}
+
+const ctbigdup = {v
+	-> std.mk([
+		.nbit=v.nbit,
+		.dig=std.sldup(v.dig),
+	])
+}
+
+const ctfree = {v
+	std.slfree(v.dig)
+	std.free(v)
+}
+
+const ctadd = {r, a, b
+	var v, i, carry, n
+
+	checksz(a, b)
+	checksz(a, r)
+
+	carry = 0
+	n = max(a.dig.len, b.dig.len)
+	for i = 0; i < n; i++
+		v = (a.dig[i] : uint64) + (b.dig[i] : uint64) + carry;
+		r.dig[i] = (v  : uint32)
+		carry >>= 32
+	;;
+}
+
+const ctsub = {r, a, b
+	var borrow, v, i
+
+	checksz(a, b)
+	checksz(a, r)
+
+	borrow = 0
+	for i = 0; i < a.dig.len; i++
+		v = (a.dig[i] : uint64) - (b.dig[i] : uint64) - borrow
+		borrow = (v & (1<<63)) >> 63
+		v = mux(borrow, v + Base, v)
+		r.dig[i] = (v  : uint32)
+	;;
+}
+
+const ctmul = {r, a, b
+	var i, j
+	var ai, bj, wij
+	var carry, t
+	var w
+
+	checksz(a, b)
+	checksz(a, r)
+
+	w  = std.slzalloc(a.dig.len + b.dig.len)
+	for j = 0; j < b.dig.len; j++
+		carry = 0
+		for i = 0; i < a.dig.len; i++
+			ai = (a.dig[i]  : uint64)
+			bj = (b.dig[j]  : uint64)
+			wij = (w[i+j]  : uint64)
+			t = ai * bj + wij + carry
+			w[i+j] = (t  : uint32)
+			carry = t >> 32
+		;;
+		w[i + j] = (carry  : uint32)
+	;;
+	/* safe to leak that a == r; not data dependent */
+	std.slgrow(&w, a.dig.len)
+	if a == r
+		std.slfree(a.dig)
+	;;
+	r.dig = w[:a.dig.len]
+}
+
+//const ctmodpow = {res, a, b
+//	/* find rinv, mprime */
+//	
+//	/* convert to monty space */
+//
+//	/* do the modpow */
+//
+//	/* and come back */
+//}
+
+const ctiszero = {a
+	var z, zz
+
+	z = 1
+	for var i = 0; i < a.dig.len; i++
+		zz = mux(a.dig[i], 0, 1)
+		z = mux(zz, z, 0)
+	;;
+	-> (z : bool)
+}
+
+const cteq = {a, b
+	var z, d, e
+
+	checksz(a, b)
+
+	e = 1
+	for var i = 0; i < a.dig.len; i++
+		z = a.dig[i] - b.dig[i]
+		d = mux(z, 1, 0)
+		e = mux(e, d, 0)
+	;;
+	-> (e : bool)
+}
+
+const ctne = {a, b
+	var v
+
+	v = (cteq(a, b) : byte)
+	-> (not(v) : bool)
+}
+
+const ctgt = {a, b
+	var e, d, g
+
+	checksz(a, b)
+
+	g = 0
+	for var i = 0; i < a.dig.len; i++
+		e = not(a.dig[i] - b.dig[i])
+		d = gt(a.dig[i], b.dig[i])
+		g = mux(e, g, d) 
+	;;
+	-> (g : bool)
+}
+
+const ctge = {a, b
+	var v
+
+	v = (ctlt(a, b) : byte)
+	-> (not(v) : bool)
+}
+
+const ctlt = {a, b
+	var e, d, l
+
+	checksz(a, b)
+
+	l = 0
+	for var i = 0; i < a.dig.len; i++
+		e = not(a.dig[i] - b.dig[i])
+		d = gt(a.dig[i], b.dig[i])
+		l = mux(e, l, d) 
+	;;
+	-> (l : bool)
+}
+
+const ctle = {a, b
+	var v
+
+	v = (ctgt(a, b) : byte)
+	-> (not(v) : bool)
+}
+
+const ndig = {nbit
+	-> (nbit + 8*sizeof(uint32) - 1)/sizeof(uint32)
+}
+
+const checksz = {a, b
+	std.assert(a.nbit == b.nbit, "mismatched bit sizes")
+	std.assert(a.dig.len == b.dig.len, "mismatched backing sizes")
+}
--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -130,7 +130,6 @@
 	re.expired = Zthr
 	re.free = Zthr
 	re.nfree = 0
-	re.nexttid = 0
 	re.nthr = 0
 }
 
@@ -184,6 +183,7 @@
 
 	re.str = str
 	re.strp = 0
+	re.nexttid = 0
 
 	bestmatch = Zthr
 	states = std.mkbs()
--- a/lib/std/bigint.myr
+++ b/lib/std/bigint.myr
@@ -92,6 +92,7 @@
 extern const put : (fmt : byte[:], args : ... -> size)
 
 const Base = 0x100000000ul
+const Kmin = 64
 
 generic mkbigint = {v : @a :: integral,numeric @a
 	var a
@@ -124,7 +125,7 @@
 	;;
 
 	for i = 0; i + 4 <= v.len; i += 4
-		std.slpush(&a.dig, \
+		slpush(&a.dig, \
 			(v[i + 0] <<  0 : uint32) | \
 			(v[i + 1] <<  8 : uint32) | \
 			(v[i + 2] << 16 : uint32) | \
@@ -135,7 +136,7 @@
 		off = i & 0x3
 		last |= (v[off] : uint32) << (8 *off)
 	;;
-	std.slpush(&a.dig, last)
+	slpush(&a.dig, last)
 	-> trim(a)
 }
 
@@ -170,7 +171,7 @@
 }
 
 const bigclear = {v
-	std.slfree(v.dig)
+	slfree(v.dig)
 	v.sign = 0
 	v.dig = [][:]
 	-> v
@@ -259,7 +260,7 @@
 	 fit in one digit.
 	 */
 	v = mkbigint(1)
-	for c : std.bychar(str)
+	for c : bychar(str)
 		if c == '_'
 			continue
 		;;
@@ -452,7 +453,7 @@
 	for i = 0; i < n; i++
 		v = (a.dig[i] : uint64) + carry;
 		if i < b.dig.len
-			v += ((b.dig[i]  : uint64))
+			v += (b.dig[i]  : uint64)
 		;;
 
 		if v >= Base
@@ -515,10 +516,7 @@
 
 /* a *= b */
 const bigmul = {a, b
-	var i, j
-	var ai, bj, wij
-	var carry, t
-	var w
+	var s
 
 	if a.sign == 0 || b.sign == 0
 		a.sign = 0
@@ -526,11 +524,84 @@
 		a.dig = [][:]
 		-> a
 	elif a.sign != b.sign
-		a.sign = -1
+		s = -1
 	else
-		a.sign = 1
+		s = 1
 	;;
 
+	umul(a, b)
+
+	a.sign = s
+	-> trim(a)
+}
+
+const umul = {a, b
+	var r
+
+	if a.dig.len < Kmin || b.dig.len < Kmin
+		smallmul(a, b)
+	else
+		r = mkbigint(0)
+		kmul(r, a, b)
+		bigmove(a, r)
+	;;
+}
+
+const kmul = {r, a, b
+	var x0, x1, y0, y1, n
+	var z0, z1, z2, t0
+
+	if a.dig.len < b.dig.len
+		t0 = a
+		a = b
+		b = t0
+	;;
+	n = min(a.dig.len / 2, b.dig.len - 1)
+
+	x0 = [.sign=1, .dig=a.dig[:n]]
+	x1 = [.sign=1, .dig=a.dig[n:]]
+	y0 = [.sign=1, .dig=b.dig[:n]]
+	y1 = [.sign=1, .dig=b.dig[n:]]
+
+	z0 = bigdup(&x0)
+	trim(z0)
+	umul(z0, &y0)
+
+	z2 = bigdup(&x1)
+	trim(z2)
+	umul(z2, &y1)
+
+
+	z1 = bigdup(&x0)
+	trim(z1)
+	bigsub(z1, &x1)
+	t0 = bigdup(&y1)
+	bigsub(t0, &y0)
+
+	umul(z1, t0)
+	bigadd(z1, z0)
+	bigadd(z1, z2)
+
+	bigshli(z1, 32*n)
+	bigshli(z2, 32*2*n)
+
+	bigclear(r)
+	bigadd(r, z0)
+	bigadd(r, z1)
+	bigadd(r, z2)
+
+	bigfree(z0)
+	bigfree(z1)
+	bigfree(z2)
+	bigfree(t0)
+}
+
+const smallmul = {a, b
+	var i, j
+	var ai, bj, wij
+	var carry, t
+	var w
+
 	w  = slzalloc(a.dig.len + b.dig.len)
 	for j = 0; j < b.dig.len; j++
 		carry = 0
@@ -546,7 +617,6 @@
 	;;
 	slfree(a.dig)
 	a.dig = w
-	-> trim(a)
 }
 
 const bigdiv = {a : bigint#, b : bigint# -> bigint#
--- a/lib/std/cmp.myr
+++ b/lib/std/cmp.myr
@@ -10,7 +10,7 @@
 		`After
 	;;
 
-	generic numcmp		: (a : @a, b : @a -> order)
+	generic numcmp		: (a : @a, b : @a -> order) :: numeric @a
 	const strcmp		: (a : byte[:], b : byte[:] -> order)
 	const strncmp		: (a : byte[:], b : byte[:], n : size -> order)
 	const strcasecmp	: (a : byte[:], b : byte[:] -> order)
--- a/lib/std/fmt.myr
+++ b/lib/std/fmt.myr
@@ -539,7 +539,7 @@
 const intfmt = {sb, opts, signed, bits : uint64, nbits
 	var isneg
 	var sval, val
-	var b : byte[32]
+	var b : byte[64]
 	var i, j, npad
 	var base
 
--- a/mbld/bld.sub
+++ b/mbld/bld.sub
@@ -1,5 +1,6 @@
 bin mbld =
 	build.myr
+	cpufeatures+x64.s
 	deps.myr
 	libs.myr
 	install.myr
--- /dev/null
+++ b/mbld/cpufeatures+x64.s
@@ -1,0 +1,10 @@
+.globl bld$cpufeatures
+.globl bld$_cpufeatures
+bld$cpufeatures:
+bld$_cpufeatures:
+	mov	$0x1, %eax
+	cpuid
+	mov	%ecx, %eax
+	rol	$32, %rax
+	shrd	$32, %rdx, %rax
+	ret
--- a/mbld/main.myr
+++ b/mbld/main.myr
@@ -29,6 +29,7 @@
 		.opts = [
 			[.opt='j', .arg="jobs", .desc="build with at most 'jobs' jobs"],
 			[.opt='t', .arg="tag", .desc="build with specified systag"],
+			[.opt='T', .arg="tag", .desc="build with only the specified systag"],
 			[.opt='S', .desc="generate assembly when building"],
 			[.opt='I', .arg="inc", .desc="add 'inc' to your include path"],
 			[.opt='R', .arg="runsrc", .desc="source to compile and run"],
@@ -51,6 +52,7 @@
 		| ('I', arg):	std.slpush(&bld.opt_incpaths, arg)
 		| ('B', arg):	bld.opt_instbase = arg
 		| ('t', tag):	std.slpush(&tags, tag)
+		| ('T', tag):	std.slpush(&bld.opt_alltags, tag)
 		| ('j', arg):	bld.opt_maxproc = std.getv(std.intparse(arg), 1)
 		| ('R', arg):	runsrc = arg
 		| ('o', arg):	bld.opt_objdir = arg
--- a/mbld/opts.myr
+++ b/mbld/opts.myr
@@ -7,9 +7,11 @@
 	var opt_arch 		: byte[:]
 	var opt_sys		: byte[:]
 	var opt_sysvers		: (int, int, int)
+	var opt_cpufeatures	: uint64
 	var opt_runtime		: byte[:]
 	var opt_genasm		: bool
 	var opt_incpaths	: byte[:][:]
+	var opt_alltags		: byte[:][:]
 	var opt_mcflags		: byte[:][:]
 	var opt_museflags	: byte[:][:]
 	var opt_ldflags		: byte[:][:]
@@ -30,14 +32,19 @@
 
 	const initopts	: (-> void)
 	const parseversion	: (v : byte[:] -> (int, int, int))
+
+	/* not exactly portable, but good enough for now */
+	const CpuidSSE4 : uint64= 0x180000
+	extern const cpufeatures : (-> uint64)
 ;;
 
 var opt_arch 		= ""
 var opt_sys		= ""
 var opt_binname		= ""
+var opt_cpufeatures	= 0ul
 var opt_libname		= ""
 var opt_runtime		= ""
-var opt_incpaths	/* FIXME: taking a constant slice is a nonconstant initializer */
+var opt_incpaths	= [][:]
 var opt_instbase	= ""
 var opt_destdir		= ""
 var opt_sysvers
@@ -48,6 +55,7 @@
 var opt_mcflags		= [][:]
 var opt_museflags	= [][:]
 var opt_ldflags		= [][:]
+var opt_alltags		= [][:]
 var opt_objdir		= "obj"
 var opt_genasm  	= false
 var opt_silent		= false
@@ -75,8 +83,10 @@
 	| unknown:	std.fatal("unknown architecture \"{}\"\n", unknown)
 	;;
 
+	/* from cpuid with EAX=1; EDX at top, ECX at bottom */
+	opt_cpufeatures = cpufeatures()
+
 	opt_maxproc = 2*(thread.ncpu() : std.size)
-	opt_incpaths = [][:]
 	opt_instbase = config.Instroot
 	opt_destdir = std.getenvv("DESTDIR", "")
 	opt_mc = std.getenvv("MYR_MC", "6m")
--- a/mbld/syssel.myr
+++ b/mbld/syssel.myr
@@ -142,27 +142,38 @@
 }
 
 const addsysattrs = {b, tags
-	std.htput(b.tags, opt_sys, opt_sysvers)
+	if opt_alltags.len > 0
+		for t : opt_alltags
+			tag(b, t)
+		;;
+	else
+		std.htput(b.tags, opt_sys, opt_sysvers)
 
-	match opt_sys
-	| "freebsd":	tag(b, "posixy")
-	| "netbsd":	tag(b, "posixy")
-	| "openbsd":	tag(b, "posixy")
-	| "osx":	tag(b, "posixy")
-	| "linux":	tag(b, "posixy")
-	| "plan9":
-	| unknown:	std.fatal("unknown system \"{}\"\n", unknown)
-	;;
+		match opt_sys
+		| "freebsd":	tag(b, "posixy")
+		| "netbsd":	tag(b, "posixy")
+		| "openbsd":	tag(b, "posixy")
+		| "osx":	tag(b, "posixy")
+		| "linux":	tag(b, "posixy")
+		| "plan9":
+		| unknown:	std.fatal("unknown system \"{}\"\n", unknown)
+		;;
 
-	match opt_arch
-	| "x64":	tag(b, "x64")
-	| unknown:	std.fatal("unknown architecture {}\n", unknown)
-	;;
-	for t : tags
-		tag(b, t)
-	;;
+		match opt_arch
+		| "x64":	
+			tag(b, "x64")
+			if opt_cpufeatures & CpuidSSE4 == CpuidSSE4
+				tag(b, "sse4")
+			;;
+		| unknown:
+			std.fatal("unknown architecture {}\n", unknown)
+		;;
 
-	loadtagfile(b, "bld.tag")
+		for t : tags
+			tag(b, t)
+		;;
+		loadtagfile(b, "bld.tag")
+	;;
 }
 
 const loadtagfile = {b, tagfile
--- a/mk/bootstrap/bootstrap+Darwin-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Darwin-x86_64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/start.o lib/thread/start+osx-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -140,5 +141,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
+	ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
 true
--- a/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/exit.o lib/thread/exit+freebsd-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+Linux-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Linux-x86_64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/exit.o lib/thread/exit+linux-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+NetBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+NetBSD-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
 	$pwd/6/6m -I . -I lib/sys lib/std/option.myr
@@ -139,5 +140,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/exit.o lib/thread/exit+openbsd-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures+x64.s mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+Plan9-amd64.sh
+++ b/mk/bootstrap/bootstrap+Plan9-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config+plan9-x64.myr
+	6a -o mbld/cpufeatures.6 mbld/cpufeatures+x64.s
 	6a -o lib/thread/atomic-impl.6 lib/thread/atomic-impl+plan9-x64.s
 	6a -o lib/std/getbp.6 lib/std/getbp+plan9-x64.s
 	$pwd/6/6.out -I lib/sys lib/std/errno+plan9.myr
@@ -138,5 +139,5 @@
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
+	6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/cpufeatures.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
 true