ref: ea3c3bcded6031ba1f72e1af6683e0c6bcade96d
parent: cefad2193dc4dfd5a9ddc2f69b38e6cdcddd4cfb
author: Ori Bernstein <[email protected]>
date: Sat Jul 28 12:28:48 EDT 2012
Decode unicode correctly.
--- a/chartype.myr
+++ b/chartype.myr
@@ -24,7 +24,7 @@
* alpha ranges -
* only covers ranges not in lower||upper
*/
-const _alpha2 = [
+const tabalpha2 = [
0x00d8, 0x00f6, /* Ø - ö */
0x00f8, 0x01f5, /* ø - ǵ */
0x0250, 0x02a8, /* ɐ - ʨ */
@@ -183,7 +183,7 @@
* alpha singlets -
* only covers ranges not in lower||upper
*/
-const _alpha1 = [
+const tabalpha1 = [
0x00aa, /* ª */
0x00b5, /* µ */
0x00ba, /* º */
@@ -221,7 +221,7 @@
/*
* space ranges
*/
-const _space2 = [
+const tabspace2 = [
0x0009, 0x000a, /* tab and newline */
0x0020, 0x0020, /* space */
0x0085, 0x0085,
@@ -240,7 +240,7 @@
* lower case ranges
* 3rd col is conversion excess 500
*/
-const _toupper2 = [
+const tabtoupper2 = [
0x0061, 0x007a, 468, /* a-z A-Z */
0x00e0, 0x00f6, 468, /* à-ö À-Ö */
0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */
@@ -282,7 +282,7 @@
* lower case singlets
* 2nd col is conversion excess 500
*/
-const _toupper1 = [
+const tabtoupper1 = [
0x00ff, 621, /* ÿ Ÿ */
0x0101, 499, /* ā Ā */
0x0103, 499, /* ă Ă */
@@ -625,7 +625,7 @@
0x1ff3, 509 /* ῳ ῼ */
]
-const _isdigitr = [
+const tabisdigitr = [
0x0030, 0x0039,
0x0660, 0x0669,
0x06f0, 0x06f9,
@@ -657,7 +657,7 @@
* upper case ranges
* 3rd col is conversion excess 500
*/
-const _tolower2 = [
+const tabtolower2 = [
0x0041, 0x005a, 532, /* A-Z a-z */
0x00c0, 0x00d6, 532, /* À-Ö à-ö */
0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */
@@ -700,7 +700,7 @@
* upper case singlets
* 2nd col is conversion excess 500
*/
-const _tolower1 = [
+const tabtolower1 = [
0x0100, 501, /* Ā ā */
0x0102, 501, /* Ă ă */
0x0104, 501, /* Ą ą */
@@ -1066,11 +1066,11 @@
c = chr castto(int)
if isupper(chr) || islower(chr)
-> true
- elif bsearch(c, _alpha2[0, _alpha2.len], _alpha2.len, 2, &l)
+ elif bsearch(c, tabalpha2[0, tabalpha2.len], tabalpha2.len, 2, &l)
if (c >= l[0] && c <= l[1])
-> true
;;
- elif bsearch(c, _alpha1[0, _alpha1.len], _alpha1.len, 1, &l)
+ elif bsearch(c, tabalpha1[0, tabalpha1.len], tabalpha1.len, 1, &l)
if (c == l[0])
-> true
;;
@@ -1082,7 +1082,7 @@
var l
var c
- if bsearch(c, _isdigitr[0, _isdigitr.len], _isdigitr.len/2, 2, &l)
+ if bsearch(c, tabisdigitr[0, tabisdigitr.len], tabisdigitr.len/2, 2, &l)
if(c >= l[0] && c <= l[1])
-> true
;;
@@ -1096,7 +1096,7 @@
var l
var c
- if bsearch(c, _space2[0,_space2.len], _space2.len/2, 2, &l)
+ if bsearch(c, tabspace2[0,tabspace2.len], tabspace2.len/2, 2, &l)
if(c >= l[0] && c <= l[1])
-> true
;;
@@ -1109,11 +1109,11 @@
var c
c = chr castto(int)
- if bsearch(c, _toupper2[0, _toupper2.len], _toupper2.len, 2, &l)
+ if bsearch(c, tabtoupper2[0, tabtoupper2.len], tabtoupper2.len, 2, &l)
if (c >= l[0] && c <= l[1])
-> true
;;
- elif bsearch(c, _toupper1[0, _toupper1.len], _toupper1.len, 1, &l)
+ elif bsearch(c, tabtoupper1[0, tabtoupper1.len], tabtoupper1.len, 1, &l)
if (c == l[0])
-> true
;;
@@ -1125,11 +1125,11 @@
var l
var c
- if bsearch(c, _tolower2[0, _tolower2.len], _tolower2.len, 2, &l)
+ if bsearch(c, tabtolower2[0, tabtolower2.len], tabtolower2.len, 2, &l)
if (c >= l[0] && c <= l[1])
-> true
;;
- elif bsearch(c, _tolower1[0, _tolower1.len], _tolower1.len, 1, &l)
+ elif bsearch(c, tabtolower1[0, tabtolower1.len], tabtolower1.len, 1, &l)
if (c == l[0])
-> true
;;
--- a/str.myr
+++ b/str.myr
@@ -5,6 +5,9 @@
pkg std =
const Badchar : char = -1 castto(char)
+ const encode : (buf : byte[,], chr : char -> bool)
+ const decode : (str : byte[,] -> char)
+
const striter : (str : byte[,] -> [char, byte[,]])
const strjoin : (lst : byte[,][,], delim:byte[,] -> byte[,])
const strsep : (str : byte[,], delim:byte[,] -> byte[,][,])
@@ -35,11 +38,11 @@
-> (Badchar, str[1,str.len])
;;
- mask = ((1 << (8 - len)) - 1) ^ 0xff
- chr = c castto(uint32)
+ mask = (1 << (7 - len)) - 1
+ chr = (c castto(uint32)) & mask
for i = 1; i < len; i++
- tmp = str[i]
- chr = chr << 8 | tmp castto(uint32)
+ tmp = str[i] castto(uint32)
+ chr = (chr << 6) | (tmp & 0x3f)
;;
-> (chr castto(char), str[len, str.len])
--- a/test.myr
+++ b/test.myr
@@ -22,5 +22,17 @@
for i = 0; i < 1024; i++
std.free(x[i])
;;
+ chartypes()
+
std.write(1, "Hello, 世界\n")
+}
+
+const chartypes = {
+ var s
+ var c
+
+ s = "世界 123\n"
+ for (c, s) = std.striter(s); s.len != 0; (c, s) = std.striter(s)
+ c = c
+ ;;
}