shithub: mc

Download patch

ref: 64a31ef2fd606a419f9e79ad56509febbff54f17
parent: 595655d194bef8eb4505ffe38af641f78aa157fe
author: Ori Bernstein <[email protected]>
date: Thu Dec 26 19:31:45 EST 2013

Add in loop detection for regexes.

    It used to be that '(a*)*' would make our regex interpreter
    loop indefinitely because we had a path through the automaton
    that would return to the start without consuming any characters.

--- a/interp.myr
+++ b/interp.myr
@@ -15,6 +15,7 @@
 	run(re)
 	match re.matched
 	| `std.Some thr:
+		trace(re, thr, "mstart: %z, mend: %z, str.len: %z\n", thr.mstart[0], thr.mend[0], str.len)
 		if thr.mend[0] != str.len
 			-> `std.None
 		else
@@ -41,7 +42,8 @@
 }
 
 const run = {re
-	var i, consumed
+	var i, ip
+	var consumed
 	var thr
 
 	re.matched = `std.None
@@ -58,10 +60,11 @@
 			thr = re.runq
 			re.runq = thr.next
 
-			trace(re, thr, "\nrunning tid=%z, ip=%z, c=%c\n", thr.tid, thr.ip, re.str[re.strp] castto(char))
-			consumed = step(re, thr)
+			trace(re, thr, "\nrunning tid=%z, ip=%z, s[%z]=%c\n", thr.tid, thr.ip, re.strp, std.decode(re.str[re.strp:]))
+			ip = thr.ip
+			consumed = step(re, thr, -1)
 			while !consumed
-				consumed = step(re, thr)
+				consumed = step(re, thr, ip)
 			;;
 
 			if thr.dead
@@ -71,11 +74,10 @@
 				re.expired = thr
 			;;
 		;;
+		trace(re, thr, "SWITCH\n")
 		re.runq = re.expired
 		re.expired = Zthr
-		if re.nthr > 0
-			re.strp++
-		;;
+		re.strp++
 	;;
 }
 
@@ -83,7 +85,7 @@
  Steps forward one instruction. Returns true if a byte of input was
  consumed, false otherwise.
 */
-const step = {re, thr
+const step = {re, thr, curip
 	var str
 	var mstart
 	var mend
@@ -142,7 +144,7 @@
 		trace(re, thr, "\t%z:\tFork (%z, %z)\n", thr.ip, lip, rip)
 		mstart = std.sldup(thr.mstart)
 		mend = std.sldup(thr.mend)
-		fork(re, thr, rip, mstart, mend)
+		fork(re, thr, rip, curip, mstart, mend)
 		thr.ip = lip
 		-> false
 	| `Ijmp ip:
@@ -157,9 +159,12 @@
 	-> true
 }
 
-const fork = {re, thr, ip, mstart, mend
+const fork = {re, thr, ip, curip, mstart, mend
 	var thr
 
+	if ip == curip /* loop detection */
+		->
+	;;
 	thr = mkthread(re, ip)
 	thr.next = re.runq
 	thr.mstart = mstart
@@ -174,7 +179,7 @@
 }
 
 const finish = {re, thr
-	trace(re, thr, "finish %z\n", tid)
+	trace(re, thr, "finish %z\n", thr.tid)
 	match re.matched
 	| `std.Some t:	thrfree(re, t)
 	| `std.None:
@@ -184,7 +189,7 @@
 	re.nthr--
 }
 
-var tid = 0
+var nexttid = 0
 
 const mkthread = {re, ip
 	var thr : rethread#
@@ -194,7 +199,7 @@
 	thr.next = Zthr
 
 	thr.ip = ip
-	thr.tid = tid++
+	thr.tid = nexttid++
 	thr.dead = false
 	thr.matched = false
 
--- a/test/data/regex-basic-expected
+++ b/test/data/regex-basic-expected
@@ -1,2 +1,5 @@
 Matched. 1 matches
 match 0: Abc
+Matched. 2 matches
+match 0: a
+match 1: a
--- a/test/regex-basic.myr
+++ b/test/regex-basic.myr
@@ -4,4 +4,5 @@
 
 const main = {
 	testmatch(".*bc", "Abc")
+	testmatch("(a*)*", "a")
 }
--- a/test/testmatch.myr
+++ b/test/testmatch.myr
@@ -3,11 +3,20 @@
 
 pkg =
 	const testmatch	: (pat : byte[:], text : byte[:] -> void)
+	const dbgmatch	: (pat : byte[:], text : byte[:] -> void)
 ;;
 
 const testmatch = {pat, text
+	run(regex.compile(pat), text)
+}
+
+const dbgmatch = {pat, text
+	run(regex.dbgcompile(pat), text)
+}
+
+const run = {regex, text
 	var i
-	match regex.compile(pat)
+	match regex
 	| `std.Success re:
 		match regex.exec(re, text)
 		| `std.Some m: