ref: 06d68114701f1c0007f8e4ae23190704126c6773
parent: 533fe80ef60f20eb63691343ef14b568597354f3
author: Ori Bernstein <[email protected]>
date: Mon Dec 30 18:34:05 EST 2013
Add support for negated character classes.
--- a/compile.myr
+++ b/compile.myr
@@ -472,6 +472,61 @@
-> ret
}
+const chrclass = {re
+ var rl, m
+ var neg
+ var t
+
+ /* we know we saw '[' on entry */
+ matchc(re, '[')
+ neg = false
+ if matchc(re, '^')
+ neg = true
+ ;;
+ rl = rangematch(re, [][:])
+ while peekc(re) != ']'
+ rl = rangematch(re, rl)
+ ;;
+ if !matchc(re, ']')
+ std.slfree(rl)
+ -> `Fail (`Earlystop)
+ ;;
+ if neg
+ std.sort(rl, {a, b;
+ if a[0] < b[0]
+ -> `std.Before
+ elif a[0] == b[0]
+ -> `std.Equal
+ else
+ -> `std.After
+ ;;})
+ m = merge(rl)
+ t = negranges(re, m)
+ std.slfree(m)
+ else
+ t = ranges(re, rl)
+ ;;
+ std.slfree(rl)
+ -> `Some t
+}
+
+const rangematch = {re, sl
+ var lo
+ var hi
+
+ lo = getc(re)
+ if matchc(re, '-')
+ hi = getc(re)
+ if lo <= hi
+ -> std.slpush(sl, [lo, hi])
+ else
+ -> std.slpush(sl, [hi, lo])
+ ;;
+ else
+ -> std.slpush(sl, [lo, lo])
+ ;;
+}
+
const ranges = {re, rng
var ret
var lhs
@@ -512,40 +567,31 @@
-> neg
}
-const chrclass = {re
- var r
- var t
+/* rl is a sorted list of ranges */
+const merge = {rl
+ var lo, hi
+ var ret
- /* we know we saw '[' on entry */
- matchc(re, '[')
- if matchc(re, '^')
- std.die("negation of character classes not yet supported")
+ if rl.len == 0
+ -> [][:]
;;
- t = rangematch(re)
- while peekc(re) != ']'
- r = rangematch(re)
- t = mk(`Alt (t, r))
+ ret = [][:]
+ lo = rl[0][0]
+ hi = rl[0][1]
+ rl = rl[1:] /* BUG: compiler wants an rval in loop range */
+ for r in rl
+ /* if it overlaps or abuts, merge */
+ if r[0] <= hi + 1
+ hi = r[1]
+ else
+ ret = std.slpush(ret, [lo, hi])
+ lo = r[0]
+ hi = r[1]
+ ;;
;;
- if !matchc(re, ']')
- astfree(t)
- -> `Fail (`Earlystop)
- else
- -> `Some t
- ;;
+ -> std.slpush(ret, [lo, hi])
}
-const rangematch = {re
- var lo
- var hi
-
- lo = getc(re)
- if matchc(re, '-')
- hi = getc(re)
- -> mk(`Class (lo, hi))
- else
- -> mk(`Chr lo)
- ;;
-}
const matchc = {re, c
var str
--- a/test/tests
+++ b/test/tests
@@ -22,6 +22,7 @@
# evident.
B regex-basic C
B regex-class C
+B regex-negclass C
B regex-capture C
B regex-failmatch C
B regex-unicode C