shithub: mc

Download patch

ref: 95d3a409233b94ff479d8b2529582d144fd172b3
parent: fd6e1d4f4da61b1e6050e12a5d9bf41f422f5d1b
author: Ori Bernstein <[email protected]>
date: Thu Oct 24 14:42:45 EDT 2013

Start working on character ranges.

--- a/compile.myr
+++ b/compile.myr
@@ -73,7 +73,9 @@
 		;;
 	`Byte	b: 	append(re, `Ibyte b);;
 	`Chr	c:	genchar(re, c);;
-	`Dot: 		append(re, `Idot);;
+	`Dot:
+		genutfrange(re, 0, std.Maxcharval)
+		;;
 
 	/* meta */
 	`Bol:
@@ -90,6 +92,50 @@
 		;;
 	;;
 	-> re.proglen
+}
+
+const genutfrange = {re, start, end
+	var ranges = [
+		(0,0x7f),
+		(0x80,0x7ff),
+		(0x800,0xffff),
+		(0x10000,0x1FFFFF)
+	]
+	var startbuf 	: byte[4]
+	var endbuf 	: byte[4]
+	var szstart
+	var szend
+	var i
+	var j
+	var lo
+	var hi
+
+	szstart = std.charlen(start)
+	szend = std.charlen(end)
+	/* 
+	  single byte characters can just be treated as a byte match, no
+	  need for branching.
+	*/
+	if szstart == szend
+		for i = 0; i < szstart; i++
+			append(re, `Irange (startbuf[i], endbuf[i]))
+		;;
+	else
+		for i = 0; i < (szend - szstart); i++
+			append(re, `Ifork (i + 1, -1)) /* replace */
+		;;
+		for i = szstart; i < szend; i++
+			(lo, hi) = ranges[i]
+			lo = std.max(lo, start)
+			hi = std.min(hi, end)
+			std.encode(startbuf[:], start)
+			std.encode(endbuf[:], end)
+			for j = 0; j < i; j++
+				append(re, `Irange (startbuf[i], endbuf[i]))
+			;;
+			append(re, `Ijmp -1)
+		;;
+	;;
 }
 
 const genalt = {re, l, r