shithub: mc

Download patch

ref: 6095d59f1bcd58c59ad3ac4271172c7037c6bda0
parent: ea3c3bcded6031ba1f72e1af6683e0c6bcade96d
author: Ori Bernstein <[email protected]>
date: Sun Jul 29 20:44:43 EDT 2012

Add in more unicode stuff.

--- a/bld.sh
+++ b/bld.sh
@@ -46,6 +46,7 @@
     die.myr \
     alloc.myr\
     str.myr \
+    fmt.myr \
     chartype.myr"
 
 OBJ="$(echo $ASM | sed 's/\.s/.o /g') $(echo $MYR | sed 's/\.myr/.o /g')"
@@ -65,4 +66,8 @@
 echo $COMP
 $COMP
 
+build f.myr 
+COMP="$CC -m32 -o f f.o -L. -lstd"
+echo $COMP
+$COMP
 
--- a/chartype.myr
+++ b/chartype.myr
@@ -1036,11 +1036,12 @@
 	0x1ffc, 491	/* ῼ ῳ */
 ]
 
-const bsearch = {c, t, sz, nelt, ret
+const findc = {c, t, sz, nelt, ret
 	var l
 	var m
 
-	while l.len > 1
+	l = t
+	while l.len > nelt
 		m = l.len/2
 		l = t[m+nelt, t.len]
 		if c >= l[0]
@@ -1059,18 +1060,16 @@
 }
 
 
-const isalpha = {chr
+const isalpha = {c
 	var l
-	var c
 
-	c = chr castto(int)
-	if isupper(chr) || islower(chr)
+	if isupper(c) || islower(c)
 		-> true
-	elif bsearch(c, tabalpha2[0, tabalpha2.len], tabalpha2.len, 2, &l)
+	elif findc(c, tabalpha2[0, tabalpha2.len], tabalpha2.len, 2, &l)
 		if (c >= l[0] && c <= l[1])
 			-> true
 		;;
-	elif bsearch(c, tabalpha1[0, tabalpha1.len], tabalpha1.len, 1, &l)
+	elif findc(c, tabalpha1[0, tabalpha1.len], tabalpha1.len, 1, &l)
 		if (c == l[0])
 			-> true
 		;;
@@ -1078,11 +1077,11 @@
 	-> false
 }
 
-const isnum = {chr
+const isnum = {c
 	var l
 	var c
 
-	if bsearch(c, tabisdigitr[0, tabisdigitr.len], tabisdigitr.len/2, 2, &l)
+	if findc(c, tabisdigitr[0, tabisdigitr.len], tabisdigitr.len/2, 2, &l)
 		if(c >= l[0] && c <= l[1])
 			-> true
 		;;
@@ -1089,14 +1088,20 @@
 	;;
 	-> false
 }
-const isalnum = {chr
-	-> isalpha(chr) || isnum(chr)
+
+const isalnum = {c
+	-> isalpha(c) || isnum(c)
 }
-const isspace = {chr
+
+const isspace = {c
 	var l
 	var c
+	var sl
+	var len
 
-	if bsearch(c, tabspace2[0,tabspace2.len], tabspace2.len/2, 2, &l)
+	sl = tabspace2[0,tabspace2.len]
+	len = tabspace2.len/2
+	if findc(c, sl, len, 2, &l)
 		if(c >= l[0] && c <= l[1])
 			-> true
 		;;
@@ -1104,16 +1109,15 @@
 	-> false
 }
 
-const islower = {chr
+const islower = {c
 	var l
 	var c
 
-	c = chr castto(int)
-	if bsearch(c, tabtoupper2[0, tabtoupper2.len], tabtoupper2.len, 2, &l)
+	if findc(c, tabtoupper2[0, tabtoupper2.len], tabtoupper2.len, 2, &l)
 		if (c >= l[0] && c <= l[1])
 			-> true
 		;;
-	elif bsearch(c, tabtoupper1[0, tabtoupper1.len], tabtoupper1.len, 1, &l)
+	elif findc(c, tabtoupper1[0, tabtoupper1.len], tabtoupper1.len, 1, &l)
 		if (c == l[0])
 			-> true
 		;;
@@ -1121,15 +1125,15 @@
 	-> false
 }
 
-const isupper = {chr
+const isupper = {c
 	var l
 	var c
 
-	if bsearch(c, tabtolower2[0, tabtolower2.len], tabtolower2.len, 2, &l)
+	if findc(c, tabtolower2[0, tabtolower2.len], tabtolower2.len, 2, &l)
 		if (c >= l[0] && c <= l[1])
 			-> true
 		;;
-	elif bsearch(c, tabtolower1[0, tabtolower1.len], tabtolower1.len, 1, &l)
+	elif findc(c, tabtolower1[0, tabtolower1.len], tabtolower1.len, 1, &l)
 		if (c == l[0])
 			-> true
 		;;
--- a/str.myr
+++ b/str.myr
@@ -5,10 +5,11 @@
 pkg std =
 	const Badchar	: char = -1 castto(char)
 
-	const encode	: (buf : byte[,], chr : char -> bool)
-	const decode	: (str : byte[,] -> char)
-
+	const charlen	: (chr : char -> int)
+	const encode	: (chr : char, buf : byte[,] -> bool)
+	const decode	: (buf : byte[,] -> char)
 	const striter	: (str : byte[,] -> [char, byte[,]])
+
 	const strjoin	: (lst : byte[,][,], delim:byte[,] -> byte[,])
 	const strsep	: (str : byte[,], delim:byte[,] -> byte[,][,])
 	const strbjoin	: (lst : byte[,][,], delim:byte[,] -> byte[,])
@@ -15,6 +16,53 @@
 	const strbsep	: (str : byte[,], delim:byte[,] -> byte[,][,])
 ;;
 
+const charlen = {c
+	if c < 0x80
+		-> 1
+	elif c < 0x800
+		-> 2
+	elif c < 0x10000
+		-> 3
+	elif c < 0x200000
+		-> 4
+	else
+		-> -1
+	;;
+}
+
+const encode = {c, buf
+	var len
+	var mark
+	var i
+
+	len = charlen(c)
+	if len < 0 || buf.len < len
+		-> false
+	;;
+
+	if (len == 1)
+		mark = 0
+	else
+		mark = (((1 << (8 - len)) - 1) ^ 0xff) castto(char)
+	;;
+
+	for i = len - 1; i > 0; i--
+		buf[i] = (c & 0x3f | 0x80) castto(byte)
+		c >>= 6
+	;;
+
+	buf[0] = (c | mark) castto(byte)
+	-> true
+}
+
+const decode = {buf
+	var c
+	var b
+
+	(c, b) = striter(buf)
+	-> c
+}
+
 const striter = {str
 	var len
 	var mask
@@ -23,7 +71,11 @@
 	var c
 	var tmp
 
+	if !str.len
+		-> (Badchar, str)
+	;;
 	c = str[0]
+	len = 0
 	if c & 0x80 == 0	/* 0b0xxx_xxxx */
 		len = 1
 	elif c & 0xe0 == 0xc0	/* 0b110x_xxxx */
@@ -36,6 +88,10 @@
 		/* skip one char forward so we can try
 		   resyncing the character stream */
 		-> (Badchar, str[1,str.len])
+	;;
+
+	if len == 0 || len > str.len
+		-> (Badchar, str)
 	;;
 
 	mask = (1 << (7 - len)) - 1
--- a/test.myr
+++ b/test.myr
@@ -2,6 +2,7 @@
 
 const main = {
 	var x : byte*[1024]
+	var buf : byte[1024]
 	var sz
 	var i
 
@@ -22,9 +23,9 @@
 	for i = 0; i < 1024; i++
 		std.free(x[i])
 	;;
-	chartypes()
 
 	std.write(1, "Hello, 世界\n")
+	chartypes()
 }
 
 const chartypes = {
@@ -33,6 +34,12 @@
 
 	s = "世界 123\n"
 	for (c, s) = std.striter(s); s.len != 0; (c, s) = std.striter(s)
-		c = c
+		if std.isspace(c)
+			std.write(1, "Space\n")
+		elif std.isalpha(c)
+			std.write(1, "Alpha\n")
+		elif std.isnum(c)
+			std.write(1, "Num\n")
+		;;
 	;;
 }