shithub: mc

Download patch

ref: c3a7db11052192c3ae599604e0c3922a28a7f771
parent: 009cb9bf1139d5ddda4e464734f53eeb4ec10d74
author: Ori Bernstein <[email protected]>
date: Thu May 19 18:41:05 EDT 2016

Add tracing of steps when in debug mode.

--- a/lib/regex/compile.myr
+++ b/lib/regex/compile.myr
@@ -85,6 +85,10 @@
 	if re.debug
 		std.htfree(re.astloc)
 		std.slfree(re.pcidx)
+		for bs in re.traces
+			std.bsfree(bs)
+		;;
+		std.slfree(re.traces)
 	;;
 	std.free(re)
 }
@@ -93,13 +97,13 @@
 /* generates bytecode from an AST */
 const gen = {re, t
 	match t#
-	|`Alt	(a, b): genalt(re, a, b)
+	|`Alt	(a, b): genalt(re, a, b, t)
 	|`Cat	(a, b): gen(re, a); gen(re, b)
 	/* repetition */
-	|`Star	a:	genstar(re, a, false)
-	|`Rstar a:	genstar(re, a, true)
-	|`Plus	a:	gen(re, a); genstar(re, a, false)
-	|`Rplus	a:	gen(re, a); genstar(re, a, true)
+	|`Star	a:	genstar(re, a, false, t)
+	|`Rstar a:	genstar(re, a, true, t)
+	|`Plus	a:	gen(re, a); genstar(re, a, false, t)
+	|`Rplus	a:	gen(re, a); genstar(re, a, true, t)
 	|`Quest	a:	genquest(re, a)
 
 	/* end matches */
@@ -290,7 +294,7 @@
 }
 
 /* generates an alternation */
-const genalt = {re, l, r
+const genalt = {re, l, r, t
 	var alt
 	var jmp
 	var l0
@@ -298,10 +302,10 @@
 	var l2
 
 	alt 	= re.proglen
-	l0	= append(re, `Ifork (-1, -1), l) /* needs to be replaced */
+	l0	= append(re, `Ifork (-1, -1), t) /* needs to be replaced */
 		  gen(re, l)
 	jmp	= re.proglen
-	l1 	= append(re, `Ijmp -1, r) /* needs to be replaced */
+	l1 	= append(re, `Ijmp -1, t) /* needs to be replaced */
 	l2	= gen(re, r)
 
 	re.prog[alt] = `Ifork(l0, l1)
@@ -310,7 +314,7 @@
 }
 
 /* generates a repetition operator */
-const genstar = {re, rep, reluct
+const genstar = {re, rep, reluct, t
 	var alt
 	var jmp
 	var l0
@@ -319,9 +323,9 @@
 
 	l0 	= re.proglen
 	alt	= re.proglen
-	l1 	= append(re, `Ifork (-1, -1), rep) /* needs to be replaced */
+	l1 	= append(re, `Ifork (-1, -1), t) /* needs to be replaced */
 	jmp	= gen(re, rep)
-	l2	= append(re, `Ijmp -1, rep)
+	l2	= append(re, `Ijmp -1, t)
 
 
 	/* reluctant matches should prefer jumping to the end. */
@@ -419,6 +423,7 @@
 	for var i = 0; i < indent; i++
 		std.put("  ")
 	;;
+	std.put("{}/", std.htgetv(re.astloc, t, -1))
 	match t#
 	| `Alt	(a, b):
 		std.put("Alt\n")
@@ -454,7 +459,7 @@
 		std.put("Eow\n")
 	/* end matches */
 	| `Chr	c:
-		std.put("Char {}\n", c)
+		std.put("Chr {}\n", c)
 	| `Ranges rl:
                 std.put("Ranges")
 		for r in rl
@@ -495,8 +500,8 @@
 	match catexpr(re)
 	| `Some t:
 		ret = t
+		idx = re.idx
 		if matchc(re, '|')
-			idx = re.idx
 			match altexpr(re)
 			| `Some rhs:
 				ret = mk(re, `Alt (ret, rhs), idx)
@@ -517,9 +522,9 @@
 	var ret
 	var idx
 
+	idx = re.idx
 	match repexpr(re)
 	| `Some t: 
-		idx = re.idx
 		ret = t
 		match catexpr(re)
 		| `Some rhs:
@@ -569,6 +574,7 @@
 	if re.pat.len == re.idx
 		-> `None
 	;;
+	idx = re.idx
 	match peekc(re)
 	/* lower prec operators */
 	| '|':	-> `None
@@ -581,10 +587,9 @@
 	| '$':	getc(re); ret = mk(re, `Eol, re.idx)
 	| '.':	
 		getc(re);
-		ret = mk(re, `Ranges std.sldup([[0, std.Maxcharval]][:]), re.idx)
+		ret = mk(re, `Ranges std.sldup([[0, std.Maxcharval]][:]), idx)
 	| '(':	
 		m = re.nmatch++
-		idx = re.idx
 		getc(re)
 		match altexpr(re)
 		| `Some s:
@@ -604,28 +609,29 @@
 		-> escaped(re)
 	| c:
 		getc(re)
-		ret = mk(re, `Chr c, re.idx)
+		ret = mk(re, `Chr c, idx)
 	;;
 	-> `Some ret
 }
 
 const escaped = {re
-	var ret
+	var ret, idx
 
+	idx = re.idx
 	match getc(re)
 	/* character classes */
-	| 'd': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciidigit[:]), re.idx)
-	| 'x': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciixdigit[:]), re.idx)
-	| 's': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciispace[:]), re.idx)
-	| 'w': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiword[:]), re.idx)
-	| 'h': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiblank[:]), re.idx)
+	| 'd': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciidigit[:]), idx)
+	| 'x': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciixdigit[:]), idx)
+	| 's': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciispace[:]), idx)
+	| 'w': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiword[:]), idx)
+	| 'h': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiblank[:]), idx)
 
 	/* negated character classes */
-	| 'W': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiword[:]), re.idx)
-	| 'S': ret = `Some mk(re, `Ranges negate(_ranges.tabasciispace[:]), re.idx)
-	| 'D': ret = `Some mk(re, `Ranges negate(_ranges.tabasciidigit[:]), re.idx)
-	| 'X': ret = `Some mk(re, `Ranges negate(_ranges.tabasciixdigit[:]), re.idx)
-	| 'H': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiblank[:]), re.idx)
+	| 'W': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiword[:]), idx)
+	| 'S': ret = `Some mk(re, `Ranges negate(_ranges.tabasciispace[:]), idx)
+	| 'D': ret = `Some mk(re, `Ranges negate(_ranges.tabasciidigit[:]), idx)
+	| 'X': ret = `Some mk(re, `Ranges negate(_ranges.tabasciixdigit[:]), idx)
+	| 'H': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiblank[:]), idx)
 
 	/* unicode character classes */
 	| 'p':	ret = unicodeclass(re, false)
@@ -632,16 +638,16 @@
 	| 'P':  ret = unicodeclass(re, true)
 
 	/* operators that need an escape */
-	| '<': ret = `Some mk(re, `Bow, re.idx)
-	| '>': ret = `Some mk(re, `Eow, re.idx)
+	| '<': ret = `Some mk(re, `Bow, idx)
+	| '>': ret = `Some mk(re, `Eow, idx)
 
 	/* escaped metachars */
-	| '^': ret = `Some mk(re, `Chr '^', re.idx)
-	| '$': ret = `Some mk(re, `Chr '$', re.idx)
-	| '.': ret = `Some mk(re, `Chr '.', re.idx)
-	| '+': ret = `Some mk(re, `Chr '+', re.idx)
-	| '?': ret = `Some mk(re, `Chr '?', re.idx)
-	| '*': ret = `Some mk(re, `Chr '*', re.idx)
+	| '^': ret = `Some mk(re, `Chr '^', idx)
+	| '$': ret = `Some mk(re, `Chr '$', idx)
+	| '.': ret = `Some mk(re, `Chr '.', idx)
+	| '+': ret = `Some mk(re, `Chr '+', idx)
+	| '?': ret = `Some mk(re, `Chr '?', idx)
+	| '*': ret = `Some mk(re, `Chr '*', idx)
 	| chr: ret = `Fail `Badescape chr
 	;;
 	-> ret
@@ -709,8 +715,8 @@
 	var t
 
 	/* we know we saw '[' on entry */
-	matchc(re, '[')
 	idx = re.idx
+	matchc(re, '[')
 	neg = false
 	if matchc(re, '^')
 		neg = true
--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -60,6 +60,7 @@
 		next = thr.next
 		thrfree(re, thr)
 	;;
+	re.nexttid = 0
 }
 
 const matchfree = {m
@@ -92,6 +93,15 @@
 	bestmatch = Zthr
 	states = std.mkbs()
 	re.runq = mkthread(re, 0)
+	if re.debug
+		/* The last run could have left things here, since we need this info after the run */
+		for bs in re.traces
+			std.bsfree(bs)
+		;;
+		std.slfree(re.traces)
+		re.traces = [][:]
+		std.slpush(&re.traces, std.mkbs())
+	;;
 	re.runq.mstart = std.slalloc(re.nmatch)
 	re.runq.mend = std.slalloc(re.nmatch)
 	for var i = 0; i < re.nmatch; i++
@@ -174,6 +184,7 @@
 		elif b != str[re.strp]
 			die(re, thr, "not right char")
 		else
+			hit(re, thr)
 			thr.ip++
 			trace(re, thr, "\t\tmatched {} with {}\n", b, str[re.strp])
 		;;
@@ -182,6 +193,7 @@
 		if !within(re, str) || start > str[re.strp] || end < str[re.strp]
 			die(re, thr, "bad range")
 		else
+			hit(re, thr)
 			thr.ip++
 		;;
 	/*
@@ -191,6 +203,7 @@
 	| `Ibol:
 		trace(re, thr, "\t{}:\tBol\n", thr.ip)
 		if re.strp == 0 || str[re.strp - 1] == ('\n' : byte)
+			hit(re, thr)
 			thr.ip++
 			-> false
 		else
@@ -199,6 +212,7 @@
 	| `Ieol:
 		trace(re, thr, "\t{}:\tEol\n", thr.ip)
 		if re.strp == str.len || str[re.strp] == ('\n' : byte)
+			hit(re, thr)
 			thr.ip++
 			-> false
 		else
@@ -208,6 +222,7 @@
 	| `Ibow:
 		trace(re, thr, "\t{}:\tBow\n", thr.ip)
 		if iswordchar(str[re.strp:]) && (re.strp == 0 || !iswordchar(prevchar(str, re.strp)))
+			hit(re, thr)
 			thr.ip++
 			-> false
 		else
@@ -216,9 +231,11 @@
 	| `Ieow:
 		trace(re, thr, "\t{}:\tEow\n", thr.ip)
 		if re.strp == str.len && iswordchar(prevchar(str, re.strp))
+			hit(re, thr)
 			thr.ip++
 			-> false
 		elif re.strp > 0 && !iswordchar(str[re.strp:]) && iswordchar(prevchar(str, re.strp))
+			hit(re, thr)
 			thr.ip++
 			-> false
 		else
@@ -228,11 +245,13 @@
 		trace(re, thr, "\t{}:\tLbra {}\n", thr.ip, m)
 		trace(re, thr, "\t\tmatch start = {}\n", re.strp)
 		thr.mstart[m] = re.strp
+		hit(re, thr)
 		thr.ip++
 		-> false
 	| `Irbra m:
 		trace(re, thr, "\t{}:\tRbra {}\n", thr.ip, m)
 		thr.mend[m] = re.strp
+		hit(re, thr)
 		thr.ip++
 		-> false
 	| `Ifork (lip, rip):
@@ -239,15 +258,21 @@
 		trace(re, thr, "\t{}:\tFork ({}, {})\n", thr.ip, lip, rip)
 		mstart = std.sldup(thr.mstart)
 		mend = std.sldup(thr.mend)
+		hit(re, thr)
 		fork(re, thr, rip, curip, mstart, mend)
+		if re.debug
+			std.slpush(&re.traces, std.bsdup(re.traces[thr.tid]))
+		;;
 		thr.ip = lip
 		-> false
 	| `Ijmp ip:
 		trace(re, thr, "\t{}:\tJmp {}\n", thr.ip, ip)
+		hit(re, thr)
 		thr.ip = ip
 		-> false
 	| `Imatch id:
 		trace(re, thr, "\t{}:\tMatch\n", thr.ip)
+		re.lastthr = thr.tid
 		finish(re, thr)
 		-> true
 	;;
@@ -280,6 +305,7 @@
 		re.nthr--
 	;;
 	re.lastip = thr.ip
+	re.lastthr = thr.tid
 	thr.dead = true
 }
 
@@ -289,7 +315,6 @@
 	re.nthr--
 }
 
-var nexttid = 0
 const mkthread = {re, ip
 	var thr : rethread#
 
@@ -298,7 +323,7 @@
 	thr.next = Zthr
 
 	thr.ip = ip
-	thr.tid = nexttid++
+	thr.tid = re.nexttid++
 	thr.dead = false
 	thr.matched = false
 
@@ -334,6 +359,12 @@
 		;;
 		std.put("^\n")
 	;;
+}
+
+const hit = {re, thr
+        if re.debug
+                std.bsput(re.traces[thr.tid], thr.ip)
+        ;;
 }
 
 /* must be called with i >= 1 */
--- a/lib/regex/redump.myr
+++ b/lib/regex/redump.myr
@@ -76,10 +76,13 @@
 		for var i = 1; i < rl.len; i++
 			std.put("\tgroup {}: {}\n", i, rl[i])
 		;;
+		std.put("coverage:\n")
+		std.put("\t{}\n", re.pat)
+		showcoverage(re)
 	| `std.None:
-		std.put("Match failed:\n")
+		std.put("Match failed at {}:\n", re.lastip)
 		std.put("\t{}\n", re.pat)
-		caret(re, re.pcidx[re.lastip] - 1)
+		caret(re, re.pcidx[re.lastip])
 		std.put("\t{}\n", ln)
 		caret(re, re.strp - 1)
 	;;
@@ -93,3 +96,28 @@
 	std.put("^\n")
 }
 
+const showcoverage = {re
+	var hit
+	var idx
+
+	hit = std.slzalloc(re.pat.len)
+	for var ip = 0; ip < re.proglen; ip++
+		if !std.bshas(re.traces[re.lastthr], ip)
+			continue
+		;;
+		idx = re.pcidx[ip]
+		if idx >= 0 && idx < hit.len
+			hit[idx] = true
+		;;
+	;;
+
+	std.put("\t")
+	for h in hit
+		if h
+			std.put("^")
+		else
+			std.put(" ")
+		;;
+	;;
+	std.put("\n")
+}
--- a/lib/regex/test/basic.myr
+++ b/lib/regex/test/basic.myr
@@ -19,21 +19,29 @@
 		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
 		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
 	][:], "")
+	std.put("hi\n")
 	testmatch(".*bc", "Abc", `std.Some [][:])
-	testmatch("(a*)*", "a", `std.Some ["a"][:])
+	std.put("1\n")
+	dbgmatch("(a*)*", "a", `std.Some ["a"][:])
+	std.put("2\n")
 	testmatch("(aa|aab?)*", s, `std.Some ["aa"][:])
+	std.put("3\n")
         /* greedy matches */
 	testmatch("(<.*>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo> blah <bar>",
 		][:])
+	std.put("3\n")
 	testmatch("(<.+>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo> blah <bar>",
 		][:])
+	std.put("4\n")
         /* reluctant matches */
 	testmatch("(<.*?>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo>",
 		][:])
+	std.put("5\n")
 	testmatch("(<.+?>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo>",
 		][:])
+	std.put("6\n")
 }
--- a/lib/regex/types.myr
+++ b/lib/regex/types.myr
@@ -11,31 +11,6 @@
 		`Badescape char
 	;;
 
-	type ast = union
-		/* basic string building */
-		`Alt	(ast#, ast#)
-		`Cat	(ast#, ast#)
-
-		/* repetition */
-		`Star	ast#
-		`Rstar  ast#
-		`Plus	ast#
-		`Rplus	ast#
-		`Quest	ast#	
-
-		/* end matches */
-		`Chr	char
-		`Ranges	char[2][:]
-
-		/* meta */
-		`Cap	(std.size, ast#) /* id, ast */
-		`Bol	/* beginning of line */
-		`Eol	/* end of line */
-		`Bow	/* beginning of word */
-		`Eow	/* end of word */
-	;;
-
-
 	type regex = struct
 		/* compile state */
 		debug	: bool
@@ -53,11 +28,38 @@
 		nthr	: std.size
 		str	: byte[:]
 		strp	: std.size
+		nexttid : std.size
 
 		/* debug state */
 		astloc	: std.htab(ast#, std.size)#
+		traces	: std.bitset#[:]
 		pcidx	: std.size[:]
 		lastip	: std.size
+		lastthr	: std.size
+	;;
+
+	type ast = union
+		/* basic string building */
+		`Alt	(ast#, ast#)
+		`Cat	(ast#, ast#)
+
+		/* repetition */
+		`Star	ast#
+		`Rstar  ast#
+		`Plus	ast#
+		`Rplus	ast#
+		`Quest	ast#	
+
+		/* end matches */
+		`Chr	char
+		`Ranges	char[2][:]
+
+		/* meta */
+		`Cap	(std.size, ast#) /* id, ast */
+		`Bol	/* beginning of line */
+		`Eol	/* end of line */
+		`Bow	/* beginning of word */
+		`Eow	/* end of word */
 	;;
 
 	pkglocal type rethread = struct