shithub: mc

ref: d5ff76398f8fb87284ea7b26121bf1262aaaae25
dir: /compile.myr/

View raw version
use std

use "types.use"

pkg regex =
	const compile	: (re : byte[:] -> std.error(regex#, status))
;;

type tree = union
	/* basic string building */
	`Alt	[tree#, tree#]
	`Cat	[tree#, tree#]

	/* repetition */
	`Star	tree#
	`Plus	tree#
	`Quest	tree#	

	/* end matches */
	`Byte	byte
	`Chr	char
	`Class	[char, char][:]
	`Dot

	/* meta */
	`Cap	tree#
	`Astart
	`Aend
;;

type parseresult = union
	`Some tree#
	`None
	`Fail status
;;

const compile = {pat
	var re : regex#

	re = std.zalloc()
	re.pat = pat
	match parse(re)
	`None:		std.put("Empty parse\n");;
	`Fail f:	-> `std.Failure f;;
	`Some t:
		dump(t, 0)
		gen(re, t)
		append(re, `Imatch)
		idump(re)
		-> `std.Success re
		;;
	;;
	-> `std.Failure (`Noimpl)
}

const gen = {re, t
	match t#
	`Alt	(a, b): genalt(re, a, b);;
	`Cat	(a, b): gen(re, a); gen(re, b);;
	/* repetition */
	`Star	a:	genstar(re, a);;
	`Plus	a:	gen(re, a); genstar(re, a);;
	`Quest	a:	genquest(re, a);;

	/* end matches */
	`Class	sl:
		std.die("Can't gen class\n")
		;;
	`Byte	b: 	append(re, `Ibyte b);;
	`Chr	c:	genchar(re, c);;
	`Dot: 		append(re, `Idot);;

	/* meta */
	`Cap	a:
		std.put("WARNING: Capture not implemented\n")
		gen(re, a)
		;;
	;;
	-> re.proglen
}

const genalt = {re, l, r
	var alt
	var jmp
	var l0
	var l1
	var l2

	alt 	= re.proglen
	l0	= append(re, `Ifork (-1, -1)) /* needs to be replaced */
		  gen(re, l)
	jmp	= re.proglen
	l1 	= append(re, `Ijmp -1) /* needs to be replaced */
	l2	= gen(re, r)

	re.prog[alt] = `Ifork(l0, l1)
	re.prog[jmp] = `Ijmp l2
	-> re.proglen
}

const genstar = {re, rep
	var alt
	var jmp
	var l0
	var l1
	var l2

	l0 	= re.proglen
	alt	= re.proglen
	l1 	= append(re, `Ifork (-1, -1)) /* needs to be replaced */
	jmp	= gen(re, rep)
	l2	= append(re, `Ijmp -1)


	re.prog[alt] = `Ifork (l1, l2)
	re.prog[jmp] = `Ijmp l0
	-> re.proglen
}

const genquest = {re, q
	var alt
	var l0
	var l1

	alt	= re.proglen
	l0	= append(re, `Ifork (-1, -1)) /* needs to be replaced */
	l1	= gen(re, q)
	re.prog[alt] = `Ifork (l0, l1)
	-> re.proglen
}

const genchar = {re, c
	var b : byte[4]
	var n
	var i

	n = std.encode(b[:], c)
	for i = 0; i < n; i++
		append(re, `Ibyte b[i])
	;;
	-> re.proglen
}

const append = {re, insn
	if re.proglen == re.prog.len
		re.prog = std.slgrow(re.prog, std.max(1, 2*re.proglen))
	;;
	re.prog[re.proglen] = insn
	re.proglen++
	-> re.proglen
}

const idump = {re
	var i

	for i = 0; i < re.proglen; i++
		std.put("%i:\t", i)
		match re.prog[i]
		/* Char matching. Consume exactly one byte from the string. */
		`Ibyte b: 
			std.put("`Ibyte %b (%c)\n", b, b castto(char))
			;;
		`Irange (start, end):
			std.put("`Irange (%b,%b)\n", start, end)
			;;
		`Idot:
			std.put("`Idot\n")
			;;
		/*
		Control flow. All of these recursively call step() until
		exactly one byte is consumed from the string.
		*/
		`Ifork	(lip, rip):
			std.put("`Ifork (%z,%z)\n", lip, rip)
			;;
		`Ijmp ip:
			std.put("`Ijmp %z\n", ip)
			;;
		`Imatch:
			std.put("`Imatch\n")
			;;
		;;
	;;
}

const dump = {t, indent
	var i

	for i = 0; i < indent; i++
		std.put(" ")
	;;
	match t#
	`Alt	(a, b):
		std.put("Alt\n")
		dump(a, indent + 1)
		dump(b, indent + 1)
		;;
	`Cat	(a, b):
		std.put("Cat\n")
		dump(a, indent + 1)
		dump(b, indent + 1)
		;;
	/* repetition */
	`Star	a:
		std.put("Star\n")
		dump(a, indent + 1)
		;;
	`Plus	a:
		std.put("Plus\n")
		dump(a, indent + 1)
		;;
	`Quest	a:
		std.put("Quest\n")
		dump(a, indent + 1)
		;;
	/* end matches */
	`Class	sl:
		std.put("Class [a..b]\n")
		;;
	`Byte	b:
		std.put("Byte %b\n", b)
		;;
	`Chr	c:
		std.put("Char %c\n", c)
		;;
	`Dot:
		std.put("Dot\n")
		;;

	/* meta */
	`Cap	a:
		std.put("Cap\n")
		dump(a, indent + 1)
		;;
	;;
}

const parse = {re
	match altexpr(re)
	`Some t:
		if re.pat.len == 0
			-> `Some t
		else
			free(t)
			-> `Fail (`Earlystop)
		;;
		;;
	`None:
		-> `None
		;;
	;;
}

const altexpr = {re
	var ret : tree#

	match catexpr(re)
	`Some t:
		ret = t
		if matchc(re, '|')
			match altexpr(re)
			`Some rhs:
				ret = mk(`Alt (ret, rhs))
				;;
			`None:
				free(ret)
				-> `Fail (`Earlystop)
				;;
			`Fail f:
				-> `Fail f
				;;
			;;
		;;
		;;
	other:	-> other;;
	;;
	std.put("<- alt\n")
	-> `Some ret
}

const catexpr = {re
	var ret

	std.put("-> cat\n")
	match repexpr(re)
	`Some t: 
		ret = t
		match catexpr(re)
		`Some rhs:
			ret = mk(`Cat (t, rhs))
			;;
		`Fail f: -> `Fail f;;
		`None:	/* nothing */;;
		;;
		;;
	other:	-> other;;
	;;
	std.put("<- cat\n")
	-> `Some ret
}

const repexpr = {re
	var ret

	std.put("-> rep\n")
	match baseexpr(re)
	`Some t:
		if matchc(re, '*')
			ret = mk(`Star t)
		elif matchc(re, '+')
			ret = mk(`Plus t)
		elif matchc(re, '?')
			ret = mk(`Quest t)
		else
			ret = t
		;;
		;;
	other:	-> other;;
	;;
	std.put("<- rep")
	-> `Some ret
}

const baseexpr = {re
	var ret

	std.put("-> base\n")
	if re.pat.len == 0
		-> `None
	;;
	match peekc(re)
	/* lower prec operators */
	'|':	-> `None;;
	')':	-> `None;;
	'*':	-> `Fail (`Badrep);;
	'+':	-> `Fail (`Badrep);;
	'?':	-> `Fail (`Badrep);;
	'[':	ret = chrclass(re);;
	'.':	getc(re); ret = mk(`Dot);;
	'^':	getc(re); ret = mk(`Astart);;
	'$':	getc(re); ret = mk(`Aend);;
	'(':	
		getc(re)
		match altexpr(re)
		`Some s:	ret = mk(`Cap s);;
		`None:	-> `Fail (`Emptyparen);;
		;;
		if !matchc(re, ')')
			std.put("Can't find match: got %c\n", getc(re))
			free(ret)
			-> `Fail (`Unbalanced)
		;;
		;;
	c:
		getc(re)
		if c == '\\'
			if re.pat.len == 0
				-> `Fail (`Earlystop)
			;;
			c = getc(re)
		;;
		ret = mk(`Chr c)
		;;
	;;
	std.put("<- base\n")
	dump(ret, 0)
	-> `Some ret
}

const chrclass = {pat
	var t

	t = std.alloc()
	t# = `Class [][:]
	-> t
}

const matchc = {re, c
	var str
	var chr

	(chr, str) = std.striter(re.pat)
	if chr != c
		-> false
	;;
	re.pat = str
	-> true
}

const getc = {re
	var c

	(c, re.pat) = std.striter(re.pat)
	-> c
}

const peekc = {re
	var c
	var _

	(c, _) = std.striter(re.pat)
	-> c
}

const mk = {v
	var t

	t = std.alloc()
	t# = v
	-> t
}

const free = {t
	match t#
	`Alt	(a, b): free(a); free(b);;
	`Cat	(a, b): free(a); free(b);;
	/* repetition */
	`Star	a:	free(a);;
	`Plus	a:	free(a);;
	`Quest	a:	free(a);;

	/* end matches */
	`Class	sl:	std.slfree(sl);;
	`Byte	b:	;;
	`Chr	c:	;;
	`Dot:		;;

	/* meta */
	`Cap	a:	free(a);;
	;;
	std.free(t)
}