ref: 1ce73fddb72b47018f8b0e4b94a43a0a4737d366
parent: 825a9ce46499cca503d963edb7bf4a3b7b61c264
author: Ori Bernstein <[email protected]>
date: Tue Dec 17 11:39:19 EST 2013
Add comments.
--- a/compile.myr
+++ b/compile.myr
@@ -34,6 +34,12 @@
`Fail status
;;
+/* Compiles a pattern into a regex */
+const compile = {pat
+ -> regexcompile(std.zalloc(), pat)
+}
+
+/* Compiles a pattern into a debug regex. This can be verbose. */
const dbgcompile = {pat
var re
@@ -42,10 +48,7 @@
-> regexcompile(re, pat)
}
-const compile = {pat
- -> regexcompile(std.zalloc(), pat)
-}
-
+/* compiles a pattern into an allocated regex */
const regexcompile = {re, pat
re.pat = pat
re.nmatch = 1 /* whole match */
@@ -64,6 +67,7 @@
-> `std.Failure (`Noimpl)
}
+/* generates bytecode from an AST */
const gen = {re, t
var m
@@ -92,7 +96,15 @@
-> re.proglen
}
-const genrange = {re, lo, hi
+/*
+ converts a codepoint range spanning multiple utf8 byte lenghts into a
+ set of utf8 ranges. Eg:
+ [0x00-0x2000] => [0x00-0x7F]|[0xC2-0xDF][0x80-0x8F]
+*/
+Const genrange = {re, lo, hi
+ /* the transitions between different char lenghts for unicode
+ characters, needed so that we know how to generate the
+ different size categories */
var charrng = [
0,
0x80,
@@ -138,6 +150,7 @@
-> re.proglen
}
+/* calculates the forward jump distance for a utf8 character range */
const jmpdist = {n
var d
var i
@@ -149,6 +162,7 @@
-> d
}
+/* generates an alternation */
const genalt = {re, l, r
var alt
var jmp
@@ -168,6 +182,7 @@
-> re.proglen
}
+/* generates a repetition operator */
const genstar = {re, rep
var alt
var jmp
@@ -187,6 +202,7 @@
-> re.proglen
}
+/* generates a question mark operator */
const genquest = {re, q
var alt
var l0
@@ -199,6 +215,7 @@
-> re.proglen
}
+/* generates a single char match */
const genchar = {re, c
var b : byte[4]
var n
@@ -211,6 +228,7 @@
-> re.proglen
}
+/* appends an instructon to an re program */
const append = {re, insn
if re.proglen == re.prog.len
re.prog = std.slgrow(re.prog, std.max(1, 2*re.proglen))
@@ -220,6 +238,7 @@
-> re.proglen
}
+/* instruction dump */
const idump = {re
var i
@@ -246,6 +265,7 @@
;;
}
+/* AST dump */
const dump = {re, t, indent
var i
@@ -293,6 +313,7 @@
;;
}
+/* parses an expression */
const parse = {re
match altexpr(re)
| `Some t: