shithub: mc

Download patch

ref: d34c5081eb1ca3b41f2fbf931212941999c93aa7
parent: 6e23df6c7a12011693b61bb5bb86a0a279667d22
author: Ori Bernstein <[email protected]>
date: Fri Jan 31 11:11:16 EST 2014

Add support for reluctant operators.

--- a/compile.myr
+++ b/compile.myr
@@ -16,7 +16,9 @@
 
 	/* repetition */
 	`Star	tree#
+        `Rstar  tree#
 	`Plus	tree#
+	`Rplus	tree#
 	`Quest	tree#	
 
 	/* end matches */
@@ -91,8 +93,10 @@
 	|`Alt	(a, b): genalt(re, a, b)
 	|`Cat	(a, b): gen(re, a); gen(re, b)
 	/* repetition */
-	|`Star	a:	genstar(re, a)
-	|`Plus	a:	gen(re, a); genstar(re, a)
+	|`Star	a:	genstar(re, a, false)
+	|`Rstar a:	genstar(re, a, true)
+	|`Plus	a:	gen(re, a); genstar(re, a, false)
+	|`Rplus	a:	gen(re, a); genstar(re, a, true)
 	|`Quest	a:	genquest(re, a)
 
 	/* end matches */
@@ -264,7 +268,7 @@
 }
 
 /* generates a repetition operator */
-const genstar = {re, rep
+const genstar = {re, rep, reluct
 	var alt
 	var jmp
 	var l0
@@ -278,7 +282,12 @@
 	l2	= append(re, `Ijmp -1)
 
 
-	re.prog[alt] = `Ifork (l1, l2)
+	/* reluctant matches should prefer jumping to the end. */
+	if reluct
+		re.prog[alt] = `Ifork (l2, l1)
+	else
+		re.prog[alt] = `Ifork (l1, l2)
+	;;
 	re.prog[jmp] = `Ijmp l0
 	-> re.proglen
 }
@@ -374,9 +383,15 @@
 	| `Star	a:
 		std.put("Star\n")
 		dump(re, a, indent + 1)
+	| `Rstar a:
+		std.put("Rstar\n")
+		dump(re, a, indent + 1)
 	| `Plus	a:
 		std.put("Plus\n")
 		dump(re, a, indent + 1)
+	| `Rplus a:
+		std.put("Rplus\n")
+		dump(re, a, indent + 1)
 	| `Quest	a:
 		std.put("Quest\n")
 		dump(re, a, indent + 1)
@@ -467,9 +482,17 @@
 	match baseexpr(re)
 	| `Some t:
 		if matchc(re, '*')
-			ret = mk(`Star t)
+                        if matchc(re, '?')
+                                ret = mk(`Rstar t)
+                        else
+				ret = mk(`Star t)
+			;;
 		elif matchc(re, '+')
-			ret = mk(`Plus t)
+                        if matchc(re, '?')
+				ret = mk(`Rplus t)
+			else
+				ret = mk(`Plus t)
+			;;
 		elif matchc(re, '?')
 			ret = mk(`Quest t)
 		else
@@ -747,12 +770,14 @@
 	| `Cat	(a, b): astfree(a); astfree(b)
 	/* repetition */
 	| `Star	a:	astfree(a)
+	| `Rstar a:	astfree(a)
 	| `Plus	a:	astfree(a)
-	| `Quest	a:	astfree(a)
+	| `Rplus a:	astfree(a)
+	| `Quest a:	astfree(a)
 
 	/* end matches */
 	| `Byte	b:	
-	| `Chr	c:	
+	| `Chr c:	
 	| `Ranges rl:	std.slfree(rl)
 
 	/* meta */
--- a/test/data/regex-basic-expected
+++ b/test/data/regex-basic-expected
@@ -6,3 +6,15 @@
 Matched aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa via (aa|aab?)* : 2
 	match 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 	match 1: aa
+Matched <a foo> blah <bar> via (<.*>).* : 2
+	match 0: <a foo> blah <bar>
+	match 1: <a foo> blah <bar>
+Matched <a foo> blah <bar> via (<.+>).* : 2
+	match 0: <a foo> blah <bar>
+	match 1: <a foo> blah <bar>
+Matched <a foo> blah <bar> via (<.*?>).* : 2
+	match 0: <a foo> blah <bar>
+	match 1: <a foo>
+Matched <a foo> blah <bar> via (<.+?>).* : 2
+	match 0: <a foo> blah <bar>
+	match 1: <a foo>
--- a/test/regex-basic.myr
+++ b/test/regex-basic.myr
@@ -22,4 +22,10 @@
 	testmatch(".*bc", "Abc")
 	testmatch("(a*)*", "a")
 	testmatch("(aa|aab?)*", s)
+        /* greedy matches */
+        testmatch("(<.*>).*", "<a foo> blah <bar>")
+        testmatch("(<.+>).*", "<a foo> blah <bar>")
+        /* reluctant matches */
+        testmatch("(<.*?>).*", "<a foo> blah <bar>")
+        testmatch("(<.+?>).*", "<a foo> blah <bar>")
 }