shithub: mc

Download patch

ref: 3dde0d66b484358b413dde842959e8a832a6a991
parent: e25ab2a93db2cff8ee634e0f82244a66137279ba
author: Ori Bernstein <[email protected]>
date: Wed Apr 29 17:23:39 EDT 2015

Add regex parsing to libregex.

    I'd like to use the same regex parser for source indexing
    and parser generation.

--- a/libregex/compile.myr
+++ b/libregex/compile.myr
@@ -4,38 +4,14 @@
 use "ranges.use"
 
 pkg regex =
+	const parse	: (re : byte[:]	-> std.result(ast#, status))
 	const compile	: (re : byte[:] -> std.result(regex#, status))
 	const dbgcompile	: (re : byte[:] -> std.result(regex#, status))
 	const free	: (re : regex# -> void)
 ;;
 
-type tree = union
-	/* basic string building */
-	`Alt	(tree#, tree#)
-	`Cat	(tree#, tree#)
-
-	/* repetition */
-	`Star	tree#
-        `Rstar  tree#
-	`Plus	tree#
-	`Rplus	tree#
-	`Quest	tree#	
-
-	/* end matches */
-	`Byte	byte
-	`Chr	char
-	`Ranges	char[2][:]
-
-	/* meta */
-	`Cap	(std.size, tree#) /* id, tree */
-	`Bol	/* beginning of line */
-	`Eol	/* end of line */
-	`Bow	/* beginning of word */
-	`Eow	/* end of word */
-;;
-
 type parseresult = union
-	`Some tree#
+	`Some ast#
 	`None
 	`Fail status
 ;;
@@ -42,23 +18,36 @@
 
 /* Compiles a pattern into a regex */
 const compile = {pat
-	-> regexcompile(std.zalloc(), pat)
+	-> regexcompile(std.mk([.pat = pat, .nmatch = 1]))
 }
 
+const parse = {pat
+	var re
+
+	re = std.mk([.pat = pat, .nmatch = 1])
+	match regexparse(re)
+	| `None:	-> `std.Fail `Incomplete
+	| `Fail f:	-> `std.Fail f
+	| `Some t:
+		if re.pat.len > 0
+			-> `std.Fail `Incomplete
+		else
+			-> `std.Ok t
+		;;
+	;;
+}
+
 /* Compiles a pattern into a debug regex. This can be verbose. */
 const dbgcompile = {pat
 	var re
 
-	re = std.zalloc()
-	re.debug = true
-	-> regexcompile(re, pat)
+	re = std.mk([.pat = pat, .nmatch = 1, .debug = true])
+	-> regexcompile(re)
 }
 
 /* compiles a pattern into an allocated regex */
-const regexcompile = {re, pat
-	re.pat = pat
-	re.nmatch = 1 /* whole match */
-	match parse(re)
+const regexcompile = {re
+	match regexparse(re)
 	| `None:	-> `std.Fail (`Incomplete)
 	| `Fail f:	-> `std.Fail f
 	| `Some t:
@@ -67,6 +56,7 @@
 		an incorrectly encoded char
 		*/
 		if re.pat.len > 0
+			astfree(t)
 			-> `std.Fail (`Incomplete)
 		;;
 		dump(re, t, 0)
@@ -473,7 +463,7 @@
 }
 
 /* parses an expression */
-const parse = {re
+const regexparse = {re
 	match altexpr(re)
 	| `Some t:
 		if re.pat.len == 0
@@ -490,7 +480,7 @@
 }
 
 const altexpr = {re
-	var ret : tree#
+	var ret
 
 	match catexpr(re)
 	| `Some t:
--- a/libregex/types.myr
+++ b/libregex/types.myr
@@ -11,6 +11,32 @@
 		`Badescape
 	;;
 
+	type ast = union
+		/* basic string building */
+		`Alt	(ast#, ast#)
+		`Cat	(ast#, ast#)
+
+		/* repetition */
+		`Star	ast#
+		`Rstar  ast#
+		`Plus	ast#
+		`Rplus	ast#
+		`Quest	ast#	
+
+		/* end matches */
+		`Byte	byte
+		`Chr	char
+		`Ranges	char[2][:]
+
+		/* meta */
+		`Cap	(std.size, ast#) /* id, ast */
+		`Bol	/* beginning of line */
+		`Eol	/* end of line */
+		`Bow	/* beginning of word */
+		`Eow	/* end of word */
+	;;
+
+
 	type regex = struct
 		/* compile state */
 		debug	: bool
--- a/parse/gram.y
+++ b/parse/gram.y
@@ -467,10 +467,7 @@
 compoundtype
         : functype   {$$ = $1;}
         | type Tosqbrac Tcolon Tcsqbrac {$$ = mktyslice($2->loc, $1);}
-        | type Tosqbrac expr Tcsqbrac {
-                $3->expr.type = mktype($3->loc, Tyuint32);
-                $$ = mktyarray($2->loc, $1, $3);
-            }
+        | type Tosqbrac expr Tcsqbrac {$$ = mktyarray($2->loc, $1, $3);}
         | type Tosqbrac Tellipsis Tcsqbrac {$$ = mktyarray($2->loc, $1, NULL);}
         | type Tderef {$$ = mktyptr($2->loc, $1);}
         | Tat Tident {$$ = mktyparam($1->loc, $2->id);}
--- a/parse/infer.c
+++ b/parse/infer.c
@@ -1712,8 +1712,10 @@
             st->intype--;
         } else if (t->type == Tyunion) {
             for (i = 0; i < t->nmemb; i++) {
-                if (t->udecls[i]->etype)
+                if (t->udecls[i]->etype) {
+                    tyresolve(st, t->udecls[i]->etype);
                     t->udecls[i]->etype = tyfix(st, ctx, t->udecls[i]->etype, noerr);
+                }
             }
         } else if (t->type == Tyname) {
             for (i = 0; i < t->narg; i++)