ref: d34c5081eb1ca3b41f2fbf931212941999c93aa7
parent: 6e23df6c7a12011693b61bb5bb86a0a279667d22
author: Ori Bernstein <[email protected]>
date: Fri Jan 31 11:11:16 EST 2014
Add support for reluctant operators.
--- a/compile.myr
+++ b/compile.myr
@@ -16,7 +16,9 @@
/* repetition */
`Star tree#
+ `Rstar tree#
`Plus tree#
+ `Rplus tree#
`Quest tree#
/* end matches */
@@ -91,8 +93,10 @@
|`Alt (a, b): genalt(re, a, b)
|`Cat (a, b): gen(re, a); gen(re, b)
/* repetition */
- |`Star a: genstar(re, a)
- |`Plus a: gen(re, a); genstar(re, a)
+ |`Star a: genstar(re, a, false)
+ |`Rstar a: genstar(re, a, true)
+ |`Plus a: gen(re, a); genstar(re, a, false)
+ |`Rplus a: gen(re, a); genstar(re, a, true)
|`Quest a: genquest(re, a)
/* end matches */
@@ -264,7 +268,7 @@
}
/* generates a repetition operator */
-const genstar = {re, rep
+const genstar = {re, rep, reluct
var alt
var jmp
var l0
@@ -278,7 +282,12 @@
l2 = append(re, `Ijmp -1)
- re.prog[alt] = `Ifork (l1, l2)
+ /* reluctant matches should prefer jumping to the end. */
+ if reluct
+ re.prog[alt] = `Ifork (l2, l1)
+ else
+ re.prog[alt] = `Ifork (l1, l2)
+ ;;
re.prog[jmp] = `Ijmp l0
-> re.proglen
}
@@ -374,9 +383,15 @@
| `Star a:
std.put("Star\n")
dump(re, a, indent + 1)
+ | `Rstar a:
+ std.put("Rstar\n")
+ dump(re, a, indent + 1)
| `Plus a:
std.put("Plus\n")
dump(re, a, indent + 1)
+ | `Rplus a:
+ std.put("Rplus\n")
+ dump(re, a, indent + 1)
| `Quest a:
std.put("Quest\n")
dump(re, a, indent + 1)
@@ -467,9 +482,17 @@
match baseexpr(re)
| `Some t:
if matchc(re, '*')
- ret = mk(`Star t)
+ if matchc(re, '?')
+ ret = mk(`Rstar t)
+ else
+ ret = mk(`Star t)
+ ;;
elif matchc(re, '+')
- ret = mk(`Plus t)
+ if matchc(re, '?')
+ ret = mk(`Rplus t)
+ else
+ ret = mk(`Plus t)
+ ;;
elif matchc(re, '?')
ret = mk(`Quest t)
else
@@ -747,12 +770,14 @@
| `Cat (a, b): astfree(a); astfree(b)
/* repetition */
| `Star a: astfree(a)
+ | `Rstar a: astfree(a)
| `Plus a: astfree(a)
- | `Quest a: astfree(a)
+ | `Rplus a: astfree(a)
+ | `Quest a: astfree(a)
/* end matches */
| `Byte b:
- | `Chr c:
+ | `Chr c:
| `Ranges rl: std.slfree(rl)
/* meta */
--- a/test/data/regex-basic-expected
+++ b/test/data/regex-basic-expected
@@ -6,3 +6,15 @@
Matched aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa via (aa|aab?)* : 2
match 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
match 1: aa
+Matched <a foo> blah <bar> via (<.*>).* : 2
+ match 0: <a foo> blah <bar>
+ match 1: <a foo> blah <bar>
+Matched <a foo> blah <bar> via (<.+>).* : 2
+ match 0: <a foo> blah <bar>
+ match 1: <a foo> blah <bar>
+Matched <a foo> blah <bar> via (<.*?>).* : 2
+ match 0: <a foo> blah <bar>
+ match 1: <a foo>
+Matched <a foo> blah <bar> via (<.+?>).* : 2
+ match 0: <a foo> blah <bar>
+ match 1: <a foo>
--- a/test/regex-basic.myr
+++ b/test/regex-basic.myr
@@ -22,4 +22,10 @@
testmatch(".*bc", "Abc")
testmatch("(a*)*", "a")
testmatch("(aa|aab?)*", s)
+ /* greedy matches */
+ testmatch("(<.*>).*", "<a foo> blah <bar>")
+ testmatch("(<.+>).*", "<a foo> blah <bar>")
+ /* reluctant matches */
+ testmatch("(<.*?>).*", "<a foo> blah <bar>")
+ testmatch("(<.+?>).*", "<a foo> blah <bar>")
}