shithub: mc

ref: ce1d62e095493346ea500219a9c11a97562275bb
dir: /str.myr/

View raw version
use "die.use"
use "sys.use"
use "types.use"

pkg std =
	const Badchar	: char = -1 castto(char)

	const nextc	: (str : byte[,] -> [char, byte[,]])
	const chrlen	: (chr : char -> size)
;;

/* utf8 conversion constants */
const Utfx = 0xe0	/* mask for "rest" chars */
const Utf1 = 0x80	/* mask for length 1 char */
const Utf2 = 0xe0	/* mask for length 1 char */
const Utf3 = 0xf0	/* mask for length 1 char */
const Utf4 = 0xf8	/* mask for length 1 char */

const nextc = {str
	var len : size
	var mask
	var chr
	var i
	var c
	var tmp

	c = str[0]
	if c & 0x80 == 0
		len = 1
	elif c & 0xe0 == 0xc0
		len = 2
	elif c & 0xf0 == 0xe0
		len = 3
	elif c & 0xf8 == 0xf0
		len = 4
	else
		/* skip one char forward so we can try
		   resyncing the character stream */
		-> (Badchar, str[1,str.len])
	;;

	mask = ((1 << (8 - len)) - 1) ^ 0xff
	chr = c castto(uint32)
	for i = 1; i < len; i++
		tmp = str[i]
		chr = chr << 8 | tmp castto(uint32)
	;;

	-> (chr castto(char), str[len, str.len])
}