ref: 95d3a409233b94ff479d8b2529582d144fd172b3
parent: fd6e1d4f4da61b1e6050e12a5d9bf41f422f5d1b
author: Ori Bernstein <[email protected]>
date: Thu Oct 24 14:42:45 EDT 2013
Start working on character ranges.
--- a/compile.myr
+++ b/compile.myr
@@ -73,7 +73,9 @@
;;
`Byte b: append(re, `Ibyte b);;
`Chr c: genchar(re, c);;
- `Dot: append(re, `Idot);;
+ `Dot:
+ genutfrange(re, 0, std.Maxcharval)
+ ;;
/* meta */
`Bol:
@@ -90,6 +92,50 @@
;;
;;
-> re.proglen
+}
+
+const genutfrange = {re, start, end
+ var ranges = [
+ (0,0x7f),
+ (0x80,0x7ff),
+ (0x800,0xffff),
+ (0x10000,0x1FFFFF)
+ ]
+ var startbuf : byte[4]
+ var endbuf : byte[4]
+ var szstart
+ var szend
+ var i
+ var j
+ var lo
+ var hi
+
+ szstart = std.charlen(start)
+ szend = std.charlen(end)
+ /*
+ single byte characters can just be treated as a byte match, no
+ need for branching.
+ */
+ if szstart == szend
+ for i = 0; i < szstart; i++
+ append(re, `Irange (startbuf[i], endbuf[i]))
+ ;;
+ else
+ for i = 0; i < (szend - szstart); i++
+ append(re, `Ifork (i + 1, -1)) /* replace */
+ ;;
+ for i = szstart; i < szend; i++
+ (lo, hi) = ranges[i]
+ lo = std.max(lo, start)
+ hi = std.min(hi, end)
+ std.encode(startbuf[:], start)
+ std.encode(endbuf[:], end)
+ for j = 0; j < i; j++
+ append(re, `Irange (startbuf[i], endbuf[i]))
+ ;;
+ append(re, `Ijmp -1)
+ ;;
+ ;;
}
const genalt = {re, l, r