shithub: mc

Download patch

ref: 2a91805ad7f1a901eae822c58f40e74558bf8ec0
parent: a2da7312d03152baea7311d4e0263f42fbc9a2e6
author: Ori Bernstein <[email protected]>
date: Tue Oct 22 09:25:32 EDT 2013

Add parsing for regex.

--- a/compile.myr
+++ b/compile.myr
@@ -3,20 +3,280 @@
 use "types.use"
 
 pkg regex =
-	const compile	: (re : byte[:] -> regex#)
+	const compile	: (re : byte[:] -> std.error(regex#, status))
 ;;
 
-const compile = {re
-	var re
+type tree = union
+	/* basic string building */
+	`Alt	[tree#, tree#]
+	`Cat	[tree#, tree#]
+
+	/* repetition */
+	`Star	tree#
+	`Plus	tree#
+	`Quest	tree#	
+
+	/* end matches */
+	`Byte	byte
+	`Chr	char
+	`Class	[char, char][:]
+	`Dot
+
+	/* meta */
+	`Cap	tree#
+	`Astart
+	`Aend
+;;
+
+type parseresult = union
+	`Some tree#
+	`None
+	`Fail status
+;;
+
+const compile = {pat
+	var re : regex#
+
 	re = std.zalloc()
-	re.prog = std.slalloc(4)
-	/* compiled regex for a* */
-	re.prog[0] = `Byte ('a' castto(byte))
-	re.prog[1] = `Byte ('a' castto(byte))
-	re.prog[2] = `Split (0, 3)
-	re.prog[3] = `Match
+	re.pat = pat
+	match parse(re)
+	`Some t:	dump(t, 0);;
+	`None:		std.put("Empty parse\n");;
+	`Fail f:	-> `std.Failure f;;
+	;;
+	-> `std.Failure (`Noimpl)
+}
 
-	-> re
+const dump = {t, indent
+	var i
+
+	for i = 0; i < indent; i++
+		std.put(" ")
+	;;
+	match t#
+	`Alt	(a, b):
+		std.put("Alt\n")
+		dump(a, indent + 1)
+		dump(b, indent + 1)
+		;;
+	`Cat	(a, b):
+		std.put("Cat\n")
+		dump(a, indent + 1)
+		dump(b, indent + 1)
+		;;
+	/* repetition */
+	`Star	a:
+		std.put("Star\n")
+		dump(a, indent + 1)
+		;;
+	`Plus	a:
+		std.put("Plus\n")
+		dump(a, indent + 1)
+		;;
+	`Quest	a:
+		std.put("Quest\n")
+		dump(a, indent + 1)
+		;;
+	/* end matches */
+	`Class	sl:
+		std.put("Class [a..b]\n")
+		;;
+	`Byte	b:
+		std.put("Byte %b\n", b)
+		;;
+	`Chr	c:
+		std.put("Char %c\n", c)
+		;;
+	`Dot:
+		std.put("Dot\n")
+		;;
+
+	/* meta */
+	`Cap	a:
+		std.put("Cap\n")
+		dump(a, indent + 1)
+		;;
+	;;
 }
 
+const parse = {re
+	match altexpr(re)
+	`Some t:
+		if re.pat.len == 0
+			-> `Some t
+		else
+			free(t)
+			-> `Fail (`Earlystop)
+		;;
+		;;
+	`None:
+		-> `None
+		;;
+	;;
+}
 
+const altexpr = {re
+	var ret : tree#
+
+	std.put("-> alt\n")
+	match catexpr(re)
+	`Some t:
+		ret = t
+		if matchc(re, '|')
+			match altexpr(re)
+			`Some rhs:
+				ret = mk(`Alt (ret, rhs))
+				;;
+			`None:
+				free(ret)
+				-> `Fail (`Earlystop)
+				;;
+			`Fail f:
+				-> `Fail f
+				;;
+			;;
+
+		;;
+		;;
+	other:	-> other;;
+	;;
+	-> `Some ret
+}
+
+const catexpr = {re
+	var ret
+
+	std.put("-> cat\n")
+	match repexpr(re)
+	`Some t: 
+		ret = t
+		match catexpr(re)
+		`Some rhs:
+			ret = mk(`Cat (t, rhs))
+			;;
+		`Fail f: -> `Fail f;;
+		`None:	/* nothing */;;
+		;;
+		;;
+	other:	-> other;;
+	;;
+	-> `Some ret
+}
+
+const repexpr = {re
+	var ret
+
+	std.put("-> rep\n")
+	match baseexpr(re)
+	`Some t:
+		if matchc(re, '*')
+			ret = mk(`Star t)
+		elif matchc(re, '+')
+			ret = mk(`Plus t)
+		elif matchc(re, '?')
+			ret = mk(`Quest t)
+		else
+			ret = t
+		;;
+		;;
+	other:	-> other;;
+	;;
+	-> `Some ret
+}
+
+const baseexpr = {re
+	var ret
+
+	std.put("-> base\n")
+	if re.pat.len == 0
+		-> `None
+	;;
+	match getc(re)
+	'[':	ret = chrclass(re);;
+	'.':	ret = mk(`Dot);;
+	'^':	ret = mk(`Astart);;
+	'$':	ret = mk(`Aend);;
+	'(':	
+		match altexpr(re)
+		`Some s:	ret = mk(`Cap s);;
+		`None:	-> `Fail (`Emptyparen);;
+		;;
+		if !matchc(re, ')')
+			free(ret)
+			-> `Fail (`Unbalanced)
+		;;
+		;;
+	'*':	-> `Fail (`Badrep);;
+	'+':	-> `Fail (`Badrep);;
+	'?':	-> `Fail (`Badrep);;
+	c:
+		if c == '\\'
+			if re.pat.len == 0
+				-> `Fail (`Earlystop)
+			;;
+			c = getc(re)
+		;;
+		ret = mk(`Chr c)
+		;;
+	;;
+	std.put("<- base\n")
+	dump(ret, 0)
+	-> `Some ret
+}
+
+const chrclass = {pat
+	var t
+
+	t = std.alloc()
+	t# = `Class [][:]
+	-> t
+}
+
+const matchc = {re, c
+	var str
+	var chr
+
+	(chr, str) = std.striter(re.pat)
+	if chr != c
+		-> false
+	;;
+	re.pat = str
+	-> true
+}
+
+const getc = {re
+	var c
+
+	(c, re.pat) = std.striter(re.pat)
+	-> c
+}
+		
+
+const mk = {v
+	var t
+
+	t = std.alloc()
+	t# = v
+	-> t
+}
+
+const free = {t
+	match t#
+	`Alt	(a, b): free(a); free(b);;
+	`Cat	(a, b): free(a); free(b);;
+	/* repetition */
+	`Star	a:	free(a);;
+	`Plus	a:	free(a);;
+	`Quest	a:	free(a);;
+
+	/* end matches */
+	`Class	sl:	std.slfree(sl);;
+	`Byte	b:	;;
+	`Chr	c:	;;
+	`Dot:		;;
+
+	/* meta */
+	`Cap	a:	free(a);;
+	;;
+	std.free(t)
+}
--- a/interp.myr
+++ b/interp.myr
@@ -35,10 +35,10 @@
 	thr = re.thr[tid]
 	str = re.str
 	match re.prog[thr.ip]
-	/* Char matching */
+	/* Char matching. Consume exactly one byte from the string. */
 	`Byte b:
 		if !in(re, str) || b != str[re.strp]
-			kill(re, tid, "not char")
+			kill(re, tid, "not right char")
 		else
 			std.put("matched %b with %b\n", b, str[re.strp])
 		;;
@@ -53,7 +53,10 @@
 			kill(re, tid, "past end")
 		;;
 		;;
-	/* Control flow */
+	/*
+	  Control flow. All of these recursively call step() until
+	  exactly one byte is consumed from the string.
+	 */
 	`Split	(lip, rip):
 		spawn(re, lip)
 		jmp(re, tid, rip)
@@ -91,7 +94,6 @@
 
 	re.thr[re.nthr] = thr
 	re.nthr++
-
 	step(re, tid)
 }
 
--- a/main.myr
+++ b/main.myr
@@ -1,13 +1,13 @@
+use regex
 use std
 
-use regex
-
 const main = {
+	regex.compile("a+")
+	/*
 	var re
 	var found
-
-	re = regex.compile("a+")
 	found = regex.exec(re, "aaa")
 	std.put("Found: len = %z\n", re.strp, found)
+	*/
 }
 
--- a/types.myr
+++ b/types.myr
@@ -1,7 +1,17 @@
 use std
 
 pkg regex =
+	type status = union
+		`Earlystop
+		`Unbalanced
+		`Emptyparen
+		`Badrep
+		`Noimpl
+	;;
+
 	type regex = struct
+		pat	: byte[:]
+		/* VM state */
 		prog	: reinst[:]
 		nthr	: std.size
 		thr	: rethread#[:]