ref: 4294b05efc28f9f70ae4c9bd6801080dde546c95
parent: 8d54eba9360a95c0ae4a3676fe9bbc9d347b998f
author: Ori Bernstein <[email protected]>
date: Fri Jan 24 18:02:58 EST 2014
Run threads in a deterministic order. This makes sure that we match the correct possibility, and we always pick corectly. Before this change, depending on the string length we would flip the order of the threads, meaning that, for example: (a?)(a*) would match "aaa" as ("a")("aa"), but would match "aaaa" as ("") ("aaaa"). This fix corrects the issue, always greedily matching from left to right.
--- a/interp.myr
+++ b/interp.myr
@@ -10,22 +10,35 @@
const Zthr = 0 castto(rethread#)
const exec = {re, str
+ var thr
+ var m
+
re.str = str
re.strp = 0
- run(re)
- match re.matched
- | `std.Some thr:
- trace(re, thr, "mstart: %z, mend: %z, str.len: %z\n", thr.mstart[0], thr.mend[0], str.len)
- if thr.mend[0] != str.len
- -> `std.None
- else
- -> `std.Some getmatches(re, thr)
- ;;
- | `std.None:
- -> `std.None
+ thr = run(re)
+ if thr != Zthr
+ m = getmatches(re, thr)
+ cleanup(re)
+ -> `std.Some m
+ else
+ cleanup(re)
+ -> `std.None
;;
}
+const cleanup = {re
+ var thr, next
+
+ for thr = re.runq; thr != Zthr; thr = next
+ next = thr.next
+ thrfree(re, thr)
+ ;;
+ for thr = re.expired; thr != Zthr; thr = next
+ next = thr.next
+ thrfree(re, thr)
+ ;;
+}
+
const getmatches = {re, thr
var ret
var i
@@ -41,12 +54,13 @@
-> ret
}
+
+/* returns a matching thread, or Zthr if no threads matched */
const run = {re
var i, ip
var consumed
var thr
- re.matched = `std.None
re.runq = mkthread(re, 0)
re.runq.mstart = std.slalloc(re.nmatch)
re.runq.mend = std.slalloc(re.nmatch)
@@ -69,16 +83,27 @@
if thr.dead
thrfree(re, thr)
+ elif thr.matched && re.strp == re.str.len
+ -> thr
elif !thr.matched
- thr.next = re.expired
- re.expired = thr
+ if re.expired == Zthr
+ re.expired = thr
+ ;;
+ if re.expiredtail != Zthr
+ re.expiredtail.next = thr
+ ;;
+ re.expiredtail = thr
+ thr.next = Zthr
+
;;
;;
trace(re, thr, "SWITCH\n")
re.runq = re.expired
re.expired = Zthr
+ re.expiredtail = Zthr
re.strp++
;;
+ -> Zthr
}
/*
@@ -180,11 +205,6 @@
const finish = {re, thr
trace(re, thr, "finish %z\n", thr.tid)
- match re.matched
- | `std.Some t: thrfree(re, t)
- | `std.None:
- ;;
- re.matched = `std.Some thr
thr.matched = true
re.nthr--
}
--- a/test/data/regex-capture-expected
+++ b/test/data/regex-capture-expected
@@ -8,3 +8,8 @@
match 0: Abcde
match 1: bcd
match 2: c
+Matched. 4 matches
+match 0: aaaa
+match 1: a
+match 2: aaa
+match 3:
--- a/test/regex-capture.myr
+++ b/test/regex-capture.myr
@@ -4,4 +4,5 @@
testmatch("A(.*)", "Abc")
testmatch("A(.*)e", "Abcde")
testmatch("A(b(.*)d)e", "Abcde")
+ testmatch("(a?)(a*)(a?)", "aaaa")
}
--- a/types.myr
+++ b/types.myr
@@ -18,12 +18,12 @@
/* VM state */
runq : rethread#
expired : rethread#
+ expiredtail : rethread#
proglen : std.size
prog : reinst[:]
nthr : std.size
str : byte[:]
strp : std.size
- matched : std.option(rethread#)
;;
type rethread = struct