shithub: mc

Download patch

ref: 336413341a83ffd0294b16e2debe1ec5132fa19b
parent: a849073c6d1c7174ba648155bcca935a00faab0f
author: Ori Bernstein <[email protected]>
date: Tue May 24 20:07:06 EDT 2016

Add more string split utility functions.

--- a/lib/std/strsplit.myr
+++ b/lib/std/strsplit.myr
@@ -10,40 +10,78 @@
 
 pkg std =
 	const strsplit	: (s : byte[:], delim : byte[:] -> byte[:][:])
+	const bstrsplit	: (sp : byte[:][:], s : byte[:], delim : byte[:] -> byte[:][:])
 	const strtok	: (s : byte[:] -> byte[:][:])
+	const bstrtok	: (sp : byte[:][:], s : byte[:] -> byte[:][:])
 ;;
 
 extern const put	: (fmt : byte[:], args : ... -> size)
 
 const strsplit = {s, delim
-	var last
 	var sp
 
 	sp = [][:]
+	-> dostrsplit(&sp, s, delim, true)
+}
+
+const bstrsplit = {sp, s, delim
+	-> dostrsplit(&sp, s, delim, false)
+}
+
+const dostrsplit : (sp : byte[:][:]#, s : byte[:], delim : byte[:], grow : bool -> byte[:][:]) = {sp : byte[:][:]#, s, delim, grow
+	var last
+	var idx
+
+	last = 0
+	idx = 0
+
 	if s.len == 0
-		-> sp
+		-> sp#[:idx]
 	;;
-	last = 0
+
 	while true
 		match strfind(s, delim)
 		| `Some i:
-			slpush(&sp, s[:i])
+			if grow
+				slpush(sp, s[:i])
+			elif idx < sp#.len - 1
+				sp#[idx] = s[:i]
+			else
+				goto donesplit
+			;;
 			s = s[i + delim.len:]
+			idx++
 		| `None:
 			goto donesplit
 		;;
 	;;
 :donesplit
-	slpush(&sp, s[:])
-	-> sp
+	if grow
+		slpush(sp, s)
+	else
+		sp#[idx] = s
+	;;
+	idx++
+	-> sp#[:idx]
 }
 
 const strtok = {s
-	var i, j
 	var toks
+	toks = [][:]
 
+	-> dostrtok(&toks, s, true)
+}
+
+const bstrtok = {toks, s
+	-> dostrtok(&toks, s, false)
+}
+
+const dostrtok = {toks, s, grow
+	var i, j
+	var idx
+
 	i = 0
-	toks = [][:]
+	idx = 0
 	while i != s.len
 		while isspace(std.decode(s[i:])) && i < s.len
 			i++
@@ -53,9 +91,18 @@
 			j++
 		;;
 		if i != j
-			slpush(&toks, s[i:j])
+			if grow
+				slpush(toks, s[i:j])
+			elif idx < toks#.len - 1
+				toks#[idx] = s[i:j]
+			else
+				toks#[idx] = s[i:]
+				idx++
+				break
+			;;
+			idx++
 		;;
 		i = j
 	;;
-	-> toks
+	-> toks#[:idx]
 }
--- a/lib/std/test/strsplit.myr
+++ b/lib/std/test/strsplit.myr
@@ -1,13 +1,32 @@
 use std
 
 const main = {
+	var b : byte[:][6]
+
+	/* dynamic str split */
 	check(std.strsplit("", ","), [][:])
 	check(std.strsplit("a,b,c ,,d,", ","), \
 		["a", "b", "c ", "", "d", ""][:])
+	check(std.strsplit("a,b,c ,,d,", ","), \
+		["a", "b", "c ", "", "d", ""][:])
+
+	/* buffered str split */
+	check(std.bstrsplit(b[:], "a,b", ","), \
+		["a", "b"][:])
+	check(std.bstrsplit(b[:], "a,b,c ,,d,", ","), \
+		["a", "b", "c ", "", "d", ""][:])
+	check(std.bstrsplit(b[:], "a,b,c,d,e,f,g,h", ","), \
+		["a", "b", "c", "d", "e", "f,g,h",][:])
+
+	/* tokenizing */
 	check(std.strtok(""), [][:])
 	check(std.strtok(" "), [][:])
 	check(std.strtok("\t"), [][:])
 	check(std.strtok("a b  c\td"), ["a", "b", "c", "d"][:])
+
+	/* buffered tokenizing */
+	check(std.bstrtok(b[:], "a b  c\td"), ["a", "b", "c", "d"][:])
+	check(std.bstrtok(b[:2], "a b  c\td"), ["a", "b  c\td"][:])
 }
 
 const check = {a, b
@@ -17,7 +36,7 @@
 	;;
 	for var i = 0; i < a.len; i++
 		if !std.sleq(a[i], b[i])
-			std.fatal("element {} mismatched: {} != {}\n", i, a[i], b[i])
+			std.fatal("element {} mismatched: '{}' != '{}'\n", i, a[i], b[i])
 		;;
 	;;
 }