ref: 336413341a83ffd0294b16e2debe1ec5132fa19b
parent: a849073c6d1c7174ba648155bcca935a00faab0f
author: Ori Bernstein <[email protected]>
date: Tue May 24 20:07:06 EDT 2016
Add more string split utility functions.
--- a/lib/std/strsplit.myr
+++ b/lib/std/strsplit.myr
@@ -10,40 +10,78 @@
pkg std =
const strsplit : (s : byte[:], delim : byte[:] -> byte[:][:])
+ const bstrsplit : (sp : byte[:][:], s : byte[:], delim : byte[:] -> byte[:][:])
const strtok : (s : byte[:] -> byte[:][:])
+ const bstrtok : (sp : byte[:][:], s : byte[:] -> byte[:][:])
;;
extern const put : (fmt : byte[:], args : ... -> size)
const strsplit = {s, delim
- var last
var sp
sp = [][:]
+ -> dostrsplit(&sp, s, delim, true)
+}
+
+const bstrsplit = {sp, s, delim
+ -> dostrsplit(&sp, s, delim, false)
+}
+
+const dostrsplit : (sp : byte[:][:]#, s : byte[:], delim : byte[:], grow : bool -> byte[:][:]) = {sp : byte[:][:]#, s, delim, grow
+ var last
+ var idx
+
+ last = 0
+ idx = 0
+
if s.len == 0
- -> sp
+ -> sp#[:idx]
;;
- last = 0
+
while true
match strfind(s, delim)
| `Some i:
- slpush(&sp, s[:i])
+ if grow
+ slpush(sp, s[:i])
+ elif idx < sp#.len - 1
+ sp#[idx] = s[:i]
+ else
+ goto donesplit
+ ;;
s = s[i + delim.len:]
+ idx++
| `None:
goto donesplit
;;
;;
:donesplit
- slpush(&sp, s[:])
- -> sp
+ if grow
+ slpush(sp, s)
+ else
+ sp#[idx] = s
+ ;;
+ idx++
+ -> sp#[:idx]
}
const strtok = {s
- var i, j
var toks
+ toks = [][:]
+ -> dostrtok(&toks, s, true)
+}
+
+const bstrtok = {toks, s
+ -> dostrtok(&toks, s, false)
+}
+
+const dostrtok = {toks, s, grow
+ var i, j
+ var idx
+
i = 0
- toks = [][:]
+ idx = 0
while i != s.len
while isspace(std.decode(s[i:])) && i < s.len
i++
@@ -53,9 +91,18 @@
j++
;;
if i != j
- slpush(&toks, s[i:j])
+ if grow
+ slpush(toks, s[i:j])
+ elif idx < toks#.len - 1
+ toks#[idx] = s[i:j]
+ else
+ toks#[idx] = s[i:]
+ idx++
+ break
+ ;;
+ idx++
;;
i = j
;;
- -> toks
+ -> toks#[:idx]
}
--- a/lib/std/test/strsplit.myr
+++ b/lib/std/test/strsplit.myr
@@ -1,13 +1,32 @@
use std
const main = {
+ var b : byte[:][6]
+
+ /* dynamic str split */
check(std.strsplit("", ","), [][:])
check(std.strsplit("a,b,c ,,d,", ","), \
["a", "b", "c ", "", "d", ""][:])
+ check(std.strsplit("a,b,c ,,d,", ","), \
+ ["a", "b", "c ", "", "d", ""][:])
+
+ /* buffered str split */
+ check(std.bstrsplit(b[:], "a,b", ","), \
+ ["a", "b"][:])
+ check(std.bstrsplit(b[:], "a,b,c ,,d,", ","), \
+ ["a", "b", "c ", "", "d", ""][:])
+ check(std.bstrsplit(b[:], "a,b,c,d,e,f,g,h", ","), \
+ ["a", "b", "c", "d", "e", "f,g,h",][:])
+
+ /* tokenizing */
check(std.strtok(""), [][:])
check(std.strtok(" "), [][:])
check(std.strtok("\t"), [][:])
check(std.strtok("a b c\td"), ["a", "b", "c", "d"][:])
+
+ /* buffered tokenizing */
+ check(std.bstrtok(b[:], "a b c\td"), ["a", "b", "c", "d"][:])
+ check(std.bstrtok(b[:2], "a b c\td"), ["a", "b c\td"][:])
}
const check = {a, b
@@ -17,7 +36,7 @@
;;
for var i = 0; i < a.len; i++
if !std.sleq(a[i], b[i])
- std.fatal("element {} mismatched: {} != {}\n", i, a[i], b[i])
+ std.fatal("element {} mismatched: '{}' != '{}'\n", i, a[i], b[i])
;;
;;
}