shithub: mc

Download patch

ref: a7ba738bca21b54d6cf2bd5cdfe39b00f3d4b350
parent: 2a91805ad7f1a901eae822c58f40e74558bf8ec0
parent: f507c7ee81391e004d492d1095ca4263d3ee1830
author: Ori Bernstein <[email protected]>
date: Tue Oct 22 11:12:37 EDT 2013

Merge branch 'master' of git+ssh://git.eigenstate.org/git/ori/libregex

Conflicts:
	compile.myr
	interp.myr
	main.myr
	types.myr

--- a/compile.myr
+++ b/compile.myr
@@ -40,11 +40,147 @@
 	re = std.zalloc()
 	re.pat = pat
 	match parse(re)
-	`Some t:	dump(t, 0);;
 	`None:		std.put("Empty parse\n");;
 	`Fail f:	-> `std.Failure f;;
+	`Some t:
+		dump(t, 0)
+		gen(re, t)
+		append(re, `Imatch)
+		idump(re)
+		-> `std.Success re
+		;;
 	;;
 	-> `std.Failure (`Noimpl)
+}
+
+const gen = {re, t
+	match t#
+	`Alt	(a, b): genalt(re, a, b);;
+	`Cat	(a, b): gen(re, a); gen(re, b);;
+	/* repetition */
+	`Star	a:	genstar(re, a);;
+	`Plus	a:	gen(re, a); genstar(re, a);;
+	`Quest	a:	genquest(re, a);;
+
+	/* end matches */
+	`Class	sl:
+		std.die("Can't gen class\n")
+		;;
+	`Byte	b: 	append(re, `Ibyte b);;
+	`Chr	c:	genchar(re, c);;
+	`Dot: 		append(re, `Idot);;
+
+	/* meta */
+	`Cap	a:
+		std.put("WARNING: Capture not implemented")
+		gen(re, a)
+		;;
+	;;
+	-> re.proglen
+}
+
+const genalt = {re, l, r
+	var alt
+	var jmp
+	var l0
+	var l1
+	var l2
+
+	alt 	= re.proglen
+	l0	= append(re, `Ifork (-1, -1)) /* needs to be replaced */
+		  gen(re, l)
+	jmp	= re.proglen
+	l1 	= append(re, `Ijmp -1) /* needs to be replaced */
+	l2	= gen(re, l)
+
+	re.prog[alt] = `Ifork(l0, l1)
+	re.prog[jmp] = `Ijmp l2
+	-> re.proglen
+}
+
+const genstar = {re, rep
+	var alt
+	var jmp
+	var l0
+	var l1
+	var l2
+
+	l0 	= re.proglen
+	alt	= re.proglen
+	l1 	= append(re, `Ifork (-1, -1)) /* needs to be replaced */
+	jmp	= gen(re, rep)
+	l2	= append(re, `Ijmp -1)
+
+
+	re.prog[alt] = `Ifork (l1, l2)
+	re.prog[jmp] = `Ijmp l0
+	-> re.proglen
+}
+
+const genquest = {re, q
+	var alt
+	var l0
+	var l1
+
+	alt	= re.proglen
+	l0	= append(re, `Ifork (-1, -1)) /* needs to be replaced */
+	l1	= gen(re, q)
+	re.prog[alt] = `Ifork (l0, l1)
+	-> re.proglen
+}
+
+const genchar = {re, c
+	var b : byte[4]
+	var n
+	var i
+
+	n = std.encode(b[:], c)
+	for i = 0; i < n; i++
+		append(re, `Ibyte b[i])
+	;;
+	-> re.proglen
+}
+
+const append = {re, insn
+	if re.proglen == re.prog.len
+		re.prog = std.slgrow(re.prog, std.max(1, 2*re.proglen))
+	;;
+	re.prog[re.proglen] = insn
+	re.proglen++
+	-> re.proglen
+}
+
+const idump = {re
+	var i
+
+	for i = 0; i < re.proglen; i++
+		std.put("%i:\t", i)
+		match re.prog[i]
+		/* Char matching. Consume exactly one byte from the string. */
+		`Ibyte b: 
+			std.put("`Ibyte %b (%c)\n", b, b castto(char))
+			;;
+		`Irange (start, end):
+			std.put("`Irange (%b,%b)\n", start, end)
+			;;
+		`Idot:
+			std.put("`Idot\n")
+			;;
+		/*
+		Control flow. All of these recursively call step() until
+		exactly one byte is consumed from the string.
+		*/
+		`Ifork	(lip, rip):
+			std.put("`Ifork (%z,%z)\n", lip, rip)
+			;;
+		`Ijmp ip:
+			std.put("`Ijmp %z\n", ip)
+			;;
+		`Imatch:
+			std.put("`Imatch\n")
+			;;
+		;;
+	;;
 }
 
 const dump = {t, indent
--- a/interp.myr
+++ b/interp.myr
@@ -12,13 +12,16 @@
 	re.str = str
 	re.strp = 0
 	re.matched = `std.None
-	spawn(re, 0)
+	mkthread(re, 0)
+	std.put("tid %z, ip %z, strp %z\n", re.thr[0].uid, re.thr[0].ip, re.strp)
 	while re.nthr > 0
 		for i = 0; i < re.nthr; i++
+			std.put("Switch to %i\n", i)
 			std.put("tid %z, ip %z, strp %z\n", re.thr[i].uid, re.thr[i].ip, re.strp)
 			step(re, i)
 		;;
 		if re.nthr > 0
+			std.put("Step forward\n")
 			re.strp++
 		;;
 	;;
@@ -36,19 +39,23 @@
 	str = re.str
 	match re.prog[thr.ip]
 	/* Char matching. Consume exactly one byte from the string. */
-	`Byte b:
-		if !in(re, str) || b != str[re.strp]
+	`Ibyte b:
+		if !in(re, str)
+			kill(re, tid, "end of string")
+		elif b != str[re.strp]
+			std.put("re.strp: %i\n", re.strp)
+			std.put("\tb: %b (%c), str[re.strp]: %b (%c)\n", b, b castto(char), str[re.strp], str[re.strp] castto(char))
 			kill(re, tid, "not right char")
 		else
 			std.put("matched %b with %b\n", b, str[re.strp])
 		;;
 		;;
-	`Range (start, end):
+	`Irange (start, end):
 		if !in(re, str) || start > str[re.strp] || end < str[re.strp]
 			kill(re, tid, "bad range")
 		;;
 		;;
-	`Dot:
+	`Idot:
 		if !in(re, str)
 			kill(re, tid, "past end")
 		;;
@@ -57,14 +64,14 @@
 	  Control flow. All of these recursively call step() until
 	  exactly one byte is consumed from the string.
 	 */
-	`Split	(lip, rip):
+	`Ifork	(lip, rip):
 		spawn(re, lip)
 		jmp(re, tid, rip)
 		;;
-	`Jmp ip:
+	`Ijmp ip:
 		jmp(re, tid, ip)
 		;;
-	`Match:
+	`Imatch:
 		finish(re, tid)
 		;;
 	;;
@@ -78,7 +85,8 @@
 }
 
 var uid = 0
-const spawn = {re, ip
+
+const mkthread = {re, ip
 	var thr : rethread#
 	var tid
 
@@ -94,7 +102,11 @@
 
 	re.thr[re.nthr] = thr
 	re.nthr++
-	step(re, tid)
+	-> tid
+}
+
+const spawn = {re, ip
+	step(re, mkthread(re, ip))
 }
 
 const kill = {re, tid, msg
--- a/main.myr
+++ b/main.myr
@@ -2,12 +2,17 @@
 use std
 
 const main = {
-	regex.compile("a+")
-	/*
-	var re
 	var found
-	found = regex.exec(re, "aaa")
-	std.put("Found: len = %z\n", re.strp, found)
-	*/
+	match regex.compile("foolish")
+	`std.Success re:
+		found = regex.exec(re, "foolish")
+		std.put("Found = %t: len = %z\n", found, re.strp)
+		-> 0
+		;;
+	`std.Failure err:
+		std.put("failed to compile regex")
+		-> 1
+		;;
+	;;
 }
 
--- a/types.myr
+++ b/types.myr
@@ -12,6 +12,7 @@
 	type regex = struct
 		pat	: byte[:]
 		/* VM state */
+		proglen	: std.size
 		prog	: reinst[:]
 		nthr	: std.size
 		thr	: rethread#[:]
@@ -27,13 +28,13 @@
 
 	type reinst = union
 		/* direct consumers */
-		`Byte	byte
-		`Range	[byte, byte]
-		`Dot
+		`Ibyte	byte
+		`Irange	[byte, byte]
+		`Idot
 
-		`Match	/* found the end of the expr */
+		`Imatch	/* found the end of the expr */
 
-		`Split	[std.size, std.size]
-		`Jmp	std.size
+		`Ifork	[std.size, std.size]
+		`Ijmp	std.size
 	;;
 ;;