shithub: mc

Download patch

ref: ea3c3bcded6031ba1f72e1af6683e0c6bcade96d
parent: cefad2193dc4dfd5a9ddc2f69b38e6cdcddd4cfb
author: Ori Bernstein <[email protected]>
date: Sat Jul 28 12:28:48 EDT 2012

Decode unicode correctly.

--- a/chartype.myr
+++ b/chartype.myr
@@ -24,7 +24,7 @@
  * alpha ranges -
  *	only covers ranges not in lower||upper
  */
-const _alpha2 = [
+const tabalpha2 = [
 	0x00d8,	0x00f6,	/* Ø - ö */
 	0x00f8,	0x01f5,	/* ø - ǵ */
 	0x0250,	0x02a8,	/* ɐ - ʨ */
@@ -183,7 +183,7 @@
  * alpha singlets -
  *	only covers ranges not in lower||upper
  */
-const _alpha1 = [
+const tabalpha1 = [
 	0x00aa,	/* ª */
 	0x00b5,	/* µ */
 	0x00ba,	/* º */
@@ -221,7 +221,7 @@
 /*
  * space ranges
  */
-const _space2 = [
+const tabspace2 = [
 	0x0009,	0x000a,	/* tab and newline */
 	0x0020,	0x0020,	/* space */
 	0x0085, 0x0085,
@@ -240,7 +240,7 @@
  * lower case ranges
  *	3rd col is conversion excess 500
  */
-const _toupper2 = [
+const tabtoupper2 = [
 	0x0061,	0x007a, 468,	/* a-z A-Z */
 	0x00e0,	0x00f6, 468,	/* à-ö À-Ö */
 	0x00f8,	0x00fe, 468,	/* ø-þ Ø-Þ */
@@ -282,7 +282,7 @@
  * lower case singlets
  *	2nd col is conversion excess 500
  */
-const _toupper1 = [
+const tabtoupper1 = [
 	0x00ff, 621,	/* ÿ Ÿ */
 	0x0101, 499,	/* ā Ā */
 	0x0103, 499,	/* ă Ă */
@@ -625,7 +625,7 @@
 	0x1ff3, 509	/* ῳ ῼ */
 ]
 
-const _isdigitr = [
+const tabisdigitr = [
 	0x0030, 0x0039,
 	0x0660, 0x0669,
 	0x06f0, 0x06f9,
@@ -657,7 +657,7 @@
  * upper case ranges
  *	3rd col is conversion excess 500
  */
-const _tolower2 = [
+const tabtolower2 = [
 	0x0041,	0x005a, 532,	/* A-Z a-z */
 	0x00c0,	0x00d6, 532,	/* À-Ö à-ö */
 	0x00d8,	0x00de, 532,	/* Ø-Þ ø-þ */
@@ -700,7 +700,7 @@
  * upper case singlets
  *	2nd col is conversion excess 500
  */
-const _tolower1 = [
+const tabtolower1 = [
 	0x0100, 501,	/* Ā ā */
 	0x0102, 501,	/* Ă ă */
 	0x0104, 501,	/* Ą ą */
@@ -1066,11 +1066,11 @@
 	c = chr castto(int)
 	if isupper(chr) || islower(chr)
 		-> true
-	elif bsearch(c, _alpha2[0, _alpha2.len], _alpha2.len, 2, &l)
+	elif bsearch(c, tabalpha2[0, tabalpha2.len], tabalpha2.len, 2, &l)
 		if (c >= l[0] && c <= l[1])
 			-> true
 		;;
-	elif bsearch(c, _alpha1[0, _alpha1.len], _alpha1.len, 1, &l)
+	elif bsearch(c, tabalpha1[0, tabalpha1.len], tabalpha1.len, 1, &l)
 		if (c == l[0])
 			-> true
 		;;
@@ -1082,7 +1082,7 @@
 	var l
 	var c
 
-	if bsearch(c, _isdigitr[0, _isdigitr.len], _isdigitr.len/2, 2, &l)
+	if bsearch(c, tabisdigitr[0, tabisdigitr.len], tabisdigitr.len/2, 2, &l)
 		if(c >= l[0] && c <= l[1])
 			-> true
 		;;
@@ -1096,7 +1096,7 @@
 	var l
 	var c
 
-	if bsearch(c, _space2[0,_space2.len], _space2.len/2, 2, &l)
+	if bsearch(c, tabspace2[0,tabspace2.len], tabspace2.len/2, 2, &l)
 		if(c >= l[0] && c <= l[1])
 			-> true
 		;;
@@ -1109,11 +1109,11 @@
 	var c
 
 	c = chr castto(int)
-	if bsearch(c, _toupper2[0, _toupper2.len], _toupper2.len, 2, &l)
+	if bsearch(c, tabtoupper2[0, tabtoupper2.len], tabtoupper2.len, 2, &l)
 		if (c >= l[0] && c <= l[1])
 			-> true
 		;;
-	elif bsearch(c, _toupper1[0, _toupper1.len], _toupper1.len, 1, &l)
+	elif bsearch(c, tabtoupper1[0, tabtoupper1.len], tabtoupper1.len, 1, &l)
 		if (c == l[0])
 			-> true
 		;;
@@ -1125,11 +1125,11 @@
 	var l
 	var c
 
-	if bsearch(c, _tolower2[0, _tolower2.len], _tolower2.len, 2, &l)
+	if bsearch(c, tabtolower2[0, tabtolower2.len], tabtolower2.len, 2, &l)
 		if (c >= l[0] && c <= l[1])
 			-> true
 		;;
-	elif bsearch(c, _tolower1[0, _tolower1.len], _tolower1.len, 1, &l)
+	elif bsearch(c, tabtolower1[0, tabtolower1.len], tabtolower1.len, 1, &l)
 		if (c == l[0])
 			-> true
 		;;
--- a/str.myr
+++ b/str.myr
@@ -5,6 +5,9 @@
 pkg std =
 	const Badchar	: char = -1 castto(char)
 
+	const encode	: (buf : byte[,], chr : char -> bool)
+	const decode	: (str : byte[,] -> char)
+
 	const striter	: (str : byte[,] -> [char, byte[,]])
 	const strjoin	: (lst : byte[,][,], delim:byte[,] -> byte[,])
 	const strsep	: (str : byte[,], delim:byte[,] -> byte[,][,])
@@ -35,11 +38,11 @@
 		-> (Badchar, str[1,str.len])
 	;;
 
-	mask = ((1 << (8 - len)) - 1) ^ 0xff
-	chr = c castto(uint32)
+	mask = (1 << (7 - len)) - 1
+	chr = (c castto(uint32)) & mask
 	for i = 1; i < len; i++
-		tmp = str[i]
-		chr = chr << 8 | tmp castto(uint32)
+		tmp = str[i] castto(uint32)
+		chr = (chr << 6) | (tmp & 0x3f)
 	;;
 
 	-> (chr castto(char), str[len, str.len])
--- a/test.myr
+++ b/test.myr
@@ -22,5 +22,17 @@
 	for i = 0; i < 1024; i++
 		std.free(x[i])
 	;;
+	chartypes()
+
 	std.write(1, "Hello, 世界\n")
+}
+
+const chartypes = {
+	var s
+	var c
+
+	s = "世界 123\n"
+	for (c, s) = std.striter(s); s.len != 0; (c, s) = std.striter(s)
+		c = c
+	;;
 }