shithub: mc

Download patch

ref: 167228d8551ba8b4dc282489258ad2e50363a56f
parent: 31247e29fb596b44fdd5c18140bc945b6a4b71ce
author: Ori Bernstein <[email protected]>
date: Mon May 23 10:17:36 EDT 2016

add substitution for full matches.

--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -5,6 +5,8 @@
 pkg regex =
 	const exec	: (re : regex#, str : byte[:] -> std.option(byte[:][:]))
 	const search	: (re : regex#, str : byte[:] -> std.option(byte[:][:]))
+	const sub	: (re : regex#, str : byte[:], subst : byte[:][:] -> std.option(byte[:]))
+	const sbsub	: (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> bool)
 	const matchfree	: (pat : byte[:][:] -> void)
 ;;
 
@@ -40,6 +42,53 @@
 		cleanup(re)
 	;;
 	->  m
+}
+
+const sub = {re, str, subst
+	var sb
+
+	sb = std.mksb()
+	if !sbsub(sb, re, str, subst)
+		-> `std.None
+	else
+		-> `std.Some std.sbfin(sb)
+	;;
+}
+
+const sbsub = {sb, re, str, subst
+	var thr, m
+
+	/* we always have m[0] as the full match */
+	if re.nmatch != subst.len + 1
+		-> false
+	;;
+
+	re.str = str
+	re.strp = 0
+	thr = run(re, true)
+	if thr == Zthr
+		m = false
+	else
+		m = dosubst(sb, re, thr, subst)
+	;;
+	cleanup(re)
+	-> m
+}
+
+const dosubst = {sb, re, thr, subst
+	var off
+
+	off = 0
+	for var i = 1; i < re.nmatch; i++
+		if thr.mstart[i] != -1 && thr.mend[i] != -1
+			std.sbputs(sb, re.str[off:thr.mstart[i]])
+			std.sbputs(sb, subst[i - 1])
+			off = thr.mend[i]
+		;;
+	;;
+	std.sbputs(sb, re.str[off:])
+	thrfree(re, thr)
+	-> true
 }
 
 const cleanup = {re
--- a/lib/regex/test/basic.myr
+++ b/lib/regex/test/basic.myr
@@ -19,27 +19,20 @@
 		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
 		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
 	][:], "")
-	std.put("hi\n")
 	testmatch(".*bc", "Abc", `std.Some [][:])
-	std.put("1\n")
-	dbgmatch("(a*)*", "a", `std.Some ["a"][:])
-	std.put("2\n")
+	testmatch("(a*)*", "a", `std.Some ["a"][:])
 	testmatch("(aa|aab?)*", s, `std.Some ["aa"][:])
-	std.put("3\n")
         /* greedy matches */
 	testmatch("(<.*>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo> blah <bar>",
 		][:])
-	std.put("3\n")
 	testmatch("(<.+>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo> blah <bar>",
 		][:])
-	std.put("4\n")
         /* reluctant matches */
 	testmatch("(<.*?>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo>",
 		][:])
-	std.put("5\n")
 	testmatch("(<.+?>).*", "<a foo> blah <bar>", `std.Some [
 			"<a foo>",
 		][:])
--- a/lib/regex/test/bld.sub
+++ b/lib/regex/test/bld.sub
@@ -55,3 +55,11 @@
 	lib @/lib/sys:sys
 	lib @/lib/regex:regex
 ;;
+
+test subst  =
+	subst.myr
+	testmatch.myr
+	lib @/lib/std:std
+	lib @/lib/sys:sys
+	lib @/lib/regex:regex
+;;
--- /dev/null
+++ b/lib/regex/test/subst.myr
@@ -1,0 +1,52 @@
+use std
+
+use "testmatch"
+
+const main = {
+	var s : byte[:]
+	var e : byte[:]
+		
+	s = std.strjoin([
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+	][:], "")
+	e = std.strcat(s[:s.len - 2], "XYZ")
+	
+	testsub(".*bc", "Abc", [][:], `std.Some "Abc")
+	testsub("(a*)*", "a", ["b"][:], `std.Some "b")
+	testsub("(aa|aab?)*", s, ["XYZ"][:], `std.Some e)
+
+        /* greedy matches */
+	testsub("(<.*>).*", \
+		"<a foo> blah <bar>", \
+		["some shite"][:], \
+		`std.Some "some shite")
+
+        /* reluctant matches */
+	testsub("(<.*?>).*", \
+		"<a foo> blah <bar>", \
+		["<some tag>"][:], \
+		`std.Some "<some tag> blah <bar>")
+
+	/* multiple subs */
+	testsub("([a-z]*)[^a-z]*([a-z]*)foo", \
+		"abc123foofoo", \
+		["XYZ", "ABC"][:], \
+		`std.Some "XYZ123ABCfoo")
+
+	/* failure */
+	testsub("([a-z]*).*([a-z]*)foo", \
+		"abc123foobar", \
+		["XYZ", "ABC"][:], \
+		`std.None)
+}
--- a/lib/regex/test/testmatch.myr
+++ b/lib/regex/test/testmatch.myr
@@ -2,9 +2,30 @@
 use regex
 
 pkg =
-	const testmatch	: (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
-	const testsearch	: (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
-	const dbgmatch	: (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
+	const testmatch	: (\
+		pat : byte[:], \
+		text : byte[:], \
+		expected : std.option(byte[:][:]) \
+		-> void)
+
+	const testsub	: ( \
+		pat : byte[:], \
+		text : byte[:], \
+		sub : byte[:][:], \
+		expected : std.option(byte[:]) \
+		-> void)
+
+	const testsearch	: ( \
+		pat : byte[:], \
+		text : byte[:], \
+		expected : std.option(byte[:][:]) \
+		-> void)
+
+	const dbgmatch	: ( \
+		pat : byte[:], \
+		text : byte[:], \
+		expected : std.option(byte[:][:]) \
+		-> void)
 ;;
 
 const testmatch = {pat, text, expected
@@ -15,8 +36,36 @@
 	run(regex.compile(pat), pat, text, expected, true)
 }
 
+const testsub = {pat, text, sub, expected
+	subst(regex.compile(pat), pat, text, sub, expected)
+}
+
 const dbgmatch = {pat, text, expected
 	run(regex.dbgcompile(pat, true), pat, text, expected, false)
+}
+
+const subst = {regex, pat, text, sub, expected
+	var re
+
+	re = std.try(regex)
+	match regex.sub(re, text, sub)
+	| `std.Some res:
+		std.put("res: {}\n", res)
+		match expected
+		| `std.Some e:
+			if !std.sleq(res, e)
+				std.fatal("bad subst: expected {}, got {}\n", e, res)
+			;;
+		| `std.None:
+			std.fatal("expected no subst, got {}", res)
+		;;
+	| `std.None:
+		match expected
+		| `std.Some e:
+			std.fatal("got no sub, expected {}\n", e)
+		| `std.None:
+		;;
+	;;
 }
 
 const run = {regex, pat, text, expected, search