ref: fb41bc022ada401c69bad8da6802762a7a114803
parent: 89598b02bebd8005be984af85a20e8bbdee9da8a
author: Ori Bernstein <[email protected]>
date: Sun Jan 26 18:55:13 EST 2014
Fix capture groups with '+' Because of the way we generated the group id when compiling, and the way it interacted with the duplication of the subnodes for '+', we generated two capture groups for. This is because '(a)+ became (a)(a)*. This change fixes it so that both capture groups, while generated the same way, get the same capture id.
--- a/compile.myr
+++ b/compile.myr
@@ -25,7 +25,7 @@
`Class [char, char]
/* meta */
- `Cap tree#
+ `Cap [std.size, tree#] /* id, tree */
`Bol /* beginning of line */
`Eol /* end of line */
;;
@@ -87,8 +87,6 @@
/* generates bytecode from an AST */
const gen = {re, t
- var m
-
match t#
|`Alt (a, b): genalt(re, a, b)
|`Cat (a, b): gen(re, a); gen(re, b)
@@ -105,8 +103,7 @@
/* meta */
|`Bol: append(re, `Ibol)
|`Eol: append(re, `Ibol)
- |`Cap a:
- m = re.nmatch++
+ |`Cap (m, a):
append(re, `Ilbra m)
gen(re, a)
append(re, `Irbra m)
@@ -325,8 +322,8 @@
std.put("Class (%c-%c)\n", a, b)
/* meta */
- | `Cap a:
- std.put("Cap\n")
+ | `Cap (m, a):
+ std.put("Cap %i\n", m)
dump(re, a, indent + 1)
;;
}
@@ -417,9 +414,9 @@
/* lower prec operators */
| '|': -> `None
| ')': -> `None
- | '*': -> `Fail (`Badrep)
- | '+': -> `Fail (`Badrep)
- | '?': -> `Fail (`Badrep)
+ | '*': -> `Fail `Badrep
+ | '+': -> `Fail `Badrep
+ | '?': -> `Fail `Badrep
| '[': -> chrclass(re)
| '.': getc(re); ret = mk(`Class (0, std.Maxcharval))
| '^': getc(re); ret = mk(`Bol)
@@ -427,17 +424,18 @@
| '(':
getc(re)
match altexpr(re)
- | `Some s: ret = mk(`Cap s)
- | `None: -> `Fail (`Emptyparen)
+ | `Some s:
+ if matchc(re, ')')
+ -> `Some mk(`Cap (re.nmatch++, s))
+ else
+ -> `Fail `Unbalanced
+ ;;
+ | `None: -> `Fail `Emptyparen
;;
- if !matchc(re, ')')
- astfree(ret)
- -> `Fail (`Unbalanced)
- ;;
| '\\':
getc(re) /* consume the slash */
if re.pat.len == 0
- -> `Fail (`Earlystop)
+ -> `Fail `Earlystop
;;
-> escaped(re)
| c:
@@ -700,7 +698,7 @@
| `Class (a, b):
/* meta */
- | `Cap a: astfree(a)
+ | `Cap (m, a): astfree(a)
;;
std.free(t)
}
--- a/test/data/regex-capture-expected
+++ b/test/data/regex-capture-expected
@@ -4,10 +4,13 @@
Matched. 2 matches
match 0: Abcde
match 1: bcd
+Matched. 2 matches
+match 0: abab
+match 1: b
Matched. 3 matches
match 0: Abcde
-match 1: bcd
-match 2: c
+match 1: c
+match 2: bcd
Matched. 4 matches
match 0: aaaa
match 1: a
--- a/test/regex-capture.myr
+++ b/test/regex-capture.myr
@@ -3,6 +3,7 @@
const main = {
testmatch("A(.*)", "Abc")
testmatch("A(.*)e", "Abcde")
+ testmatch("(a|b)+", "abab")
testmatch("A(b(.*)d)e", "Abcde")
testmatch("(a?)(a*)(a?)", "aaaa")
}