shithub: mc

Download patch

ref: 750f64d65eaa7af67dc7377ee948dda823c68476
parent: f468624badf571a70dcab63d905a5f0182a2a313
author: Ori Bernstein <[email protected]>
date: Sun Oct 27 09:15:48 EDT 2013

Add support for encoding unicode range checks.

--- a/compile.myr
+++ b/compile.myr
@@ -73,7 +73,7 @@
 		;;
 	`Byte	b: 	append(re, `Ibyte b);;
 	`Chr	c:	genchar(re, c);;
-	`Dot: 		append(re, `Idot);;
+	`Dot: 		genrange(re, 0, std.Maxcharval);;
 
 	/* meta */
 	`Bol:
@@ -90,6 +90,64 @@
 		;;
 	;;
 	-> re.proglen
+}
+
+const genrange = {re, lo, hi
+	var charrng = [
+		0,
+		0x80,
+		0x800,
+		0x10000,
+		0x200000,
+		-1
+	]
+	var lbuf : byte[4]
+	var hbuf : byte[4]
+	var lsz
+	var hsz
+	var end
+	var sz
+	var d
+	var i
+	var j
+
+	lsz = std.charlen(lo)
+	hsz = std.charlen(hi)
+	charrng[lsz - 1] = lo
+	charrng[hsz] = hi
+	if lsz == 1 && hsz == 1
+		append(re, `Irange (lo castto(byte), hi castto(byte)))
+	else
+		for i = hsz; i > lsz; i--
+			std.put("i = %z\n", i - 2)
+			d = re.proglen + i - 1
+			append(re, `Ifork (re.proglen + 1, jmpdist(i) + d))
+		;;
+		end = re.proglen + jmpdist(hsz + 1);
+		for i = 0; i < hsz; i++
+			std.put("lo[%z] = %i\n", i, charrng[i] castto(int))
+			std.put("hi[%z] = %i\n", i, (charrng[i + 1] - 1) castto(int))
+
+			sz = std.encode(lbuf[:], charrng[i])
+			std.encode(hbuf[:], charrng[i + 1] - 1)
+			for j = 0; j < sz; j++
+				append(re, `Irange (lbuf[j], hbuf[j]))
+			;;
+			append(re, `Ijmp (end))
+		;;
+	;;
+	-> re.proglen
+}
+
+const jmpdist = {n
+	var d
+	var i
+
+	d = n - 1
+	for i = n - 1; i > 0; i--
+		d += i
+	;;
+	-> d
 }
 
 const genalt = {re, l, r
--- a/main.myr
+++ b/main.myr
@@ -3,9 +3,9 @@
 
 const main = {
 	var found
-	match regex.compile("b*\n^a*")
+	match regex.compile("(.)bc")
 	`std.Success re:
-		found = regex.exec(re, "b\naaa")
+		found = regex.exec(re, "世bc")
 		std.put("Found = %t: len = %z\n", found, re.strp)
 		-> 0
 		;;