ref: a7ba738bca21b54d6cf2bd5cdfe39b00f3d4b350
parent: 2a91805ad7f1a901eae822c58f40e74558bf8ec0
parent: f507c7ee81391e004d492d1095ca4263d3ee1830
author: Ori Bernstein <[email protected]>
date: Tue Oct 22 11:12:37 EDT 2013
Merge branch 'master' of git+ssh://git.eigenstate.org/git/ori/libregex Conflicts: compile.myr interp.myr main.myr types.myr
--- a/compile.myr
+++ b/compile.myr
@@ -40,11 +40,147 @@
re = std.zalloc()
re.pat = pat
match parse(re)
- `Some t: dump(t, 0);;
`None: std.put("Empty parse\n");;
`Fail f: -> `std.Failure f;;
+ `Some t:
+ dump(t, 0)
+ gen(re, t)
+ append(re, `Imatch)
+ idump(re)
+ -> `std.Success re
+ ;;
;;
-> `std.Failure (`Noimpl)
+}
+
+const gen = {re, t
+ match t#
+ `Alt (a, b): genalt(re, a, b);;
+ `Cat (a, b): gen(re, a); gen(re, b);;
+ /* repetition */
+ `Star a: genstar(re, a);;
+ `Plus a: gen(re, a); genstar(re, a);;
+ `Quest a: genquest(re, a);;
+
+ /* end matches */
+ `Class sl:
+ std.die("Can't gen class\n")
+ ;;
+ `Byte b: append(re, `Ibyte b);;
+ `Chr c: genchar(re, c);;
+ `Dot: append(re, `Idot);;
+
+ /* meta */
+ `Cap a:
+ std.put("WARNING: Capture not implemented")
+ gen(re, a)
+ ;;
+ ;;
+ -> re.proglen
+}
+
+const genalt = {re, l, r
+ var alt
+ var jmp
+ var l0
+ var l1
+ var l2
+
+ alt = re.proglen
+ l0 = append(re, `Ifork (-1, -1)) /* needs to be replaced */
+ gen(re, l)
+ jmp = re.proglen
+ l1 = append(re, `Ijmp -1) /* needs to be replaced */
+ l2 = gen(re, l)
+
+ re.prog[alt] = `Ifork(l0, l1)
+ re.prog[jmp] = `Ijmp l2
+ -> re.proglen
+}
+
+const genstar = {re, rep
+ var alt
+ var jmp
+ var l0
+ var l1
+ var l2
+
+ l0 = re.proglen
+ alt = re.proglen
+ l1 = append(re, `Ifork (-1, -1)) /* needs to be replaced */
+ jmp = gen(re, rep)
+ l2 = append(re, `Ijmp -1)
+
+
+ re.prog[alt] = `Ifork (l1, l2)
+ re.prog[jmp] = `Ijmp l0
+ -> re.proglen
+}
+
+const genquest = {re, q
+ var alt
+ var l0
+ var l1
+
+ alt = re.proglen
+ l0 = append(re, `Ifork (-1, -1)) /* needs to be replaced */
+ l1 = gen(re, q)
+ re.prog[alt] = `Ifork (l0, l1)
+ -> re.proglen
+}
+
+const genchar = {re, c
+ var b : byte[4]
+ var n
+ var i
+
+ n = std.encode(b[:], c)
+ for i = 0; i < n; i++
+ append(re, `Ibyte b[i])
+ ;;
+ -> re.proglen
+}
+
+const append = {re, insn
+ if re.proglen == re.prog.len
+ re.prog = std.slgrow(re.prog, std.max(1, 2*re.proglen))
+ ;;
+ re.prog[re.proglen] = insn
+ re.proglen++
+ -> re.proglen
+}
+
+const idump = {re
+ var i
+
+ for i = 0; i < re.proglen; i++
+ std.put("%i:\t", i)
+ match re.prog[i]
+ /* Char matching. Consume exactly one byte from the string. */
+ `Ibyte b:
+ std.put("`Ibyte %b (%c)\n", b, b castto(char))
+ ;;
+ `Irange (start, end):
+ std.put("`Irange (%b,%b)\n", start, end)
+ ;;
+ `Idot:
+ std.put("`Idot\n")
+ ;;
+ /*
+ Control flow. All of these recursively call step() until
+ exactly one byte is consumed from the string.
+ */
+ `Ifork (lip, rip):
+ std.put("`Ifork (%z,%z)\n", lip, rip)
+ ;;
+ `Ijmp ip:
+ std.put("`Ijmp %z\n", ip)
+ ;;
+ `Imatch:
+ std.put("`Imatch\n")
+ ;;
+ ;;
+ ;;
}
const dump = {t, indent
--- a/interp.myr
+++ b/interp.myr
@@ -12,13 +12,16 @@
re.str = str
re.strp = 0
re.matched = `std.None
- spawn(re, 0)
+ mkthread(re, 0)
+ std.put("tid %z, ip %z, strp %z\n", re.thr[0].uid, re.thr[0].ip, re.strp)
while re.nthr > 0
for i = 0; i < re.nthr; i++
+ std.put("Switch to %i\n", i)
std.put("tid %z, ip %z, strp %z\n", re.thr[i].uid, re.thr[i].ip, re.strp)
step(re, i)
;;
if re.nthr > 0
+ std.put("Step forward\n")
re.strp++
;;
;;
@@ -36,19 +39,23 @@
str = re.str
match re.prog[thr.ip]
/* Char matching. Consume exactly one byte from the string. */
- `Byte b:
- if !in(re, str) || b != str[re.strp]
+ `Ibyte b:
+ if !in(re, str)
+ kill(re, tid, "end of string")
+ elif b != str[re.strp]
+ std.put("re.strp: %i\n", re.strp)
+ std.put("\tb: %b (%c), str[re.strp]: %b (%c)\n", b, b castto(char), str[re.strp], str[re.strp] castto(char))
kill(re, tid, "not right char")
else
std.put("matched %b with %b\n", b, str[re.strp])
;;
;;
- `Range (start, end):
+ `Irange (start, end):
if !in(re, str) || start > str[re.strp] || end < str[re.strp]
kill(re, tid, "bad range")
;;
;;
- `Dot:
+ `Idot:
if !in(re, str)
kill(re, tid, "past end")
;;
@@ -57,14 +64,14 @@
Control flow. All of these recursively call step() until
exactly one byte is consumed from the string.
*/
- `Split (lip, rip):
+ `Ifork (lip, rip):
spawn(re, lip)
jmp(re, tid, rip)
;;
- `Jmp ip:
+ `Ijmp ip:
jmp(re, tid, ip)
;;
- `Match:
+ `Imatch:
finish(re, tid)
;;
;;
@@ -78,7 +85,8 @@
}
var uid = 0
-const spawn = {re, ip
+
+const mkthread = {re, ip
var thr : rethread#
var tid
@@ -94,7 +102,11 @@
re.thr[re.nthr] = thr
re.nthr++
- step(re, tid)
+ -> tid
+}
+
+const spawn = {re, ip
+ step(re, mkthread(re, ip))
}
const kill = {re, tid, msg
--- a/main.myr
+++ b/main.myr
@@ -2,12 +2,17 @@
use std
const main = {
- regex.compile("a+")
- /*
- var re
var found
- found = regex.exec(re, "aaa")
- std.put("Found: len = %z\n", re.strp, found)
- */
+ match regex.compile("foolish")
+ `std.Success re:
+ found = regex.exec(re, "foolish")
+ std.put("Found = %t: len = %z\n", found, re.strp)
+ -> 0
+ ;;
+ `std.Failure err:
+ std.put("failed to compile regex")
+ -> 1
+ ;;
+ ;;
}
--- a/types.myr
+++ b/types.myr
@@ -12,6 +12,7 @@
type regex = struct
pat : byte[:]
/* VM state */
+ proglen : std.size
prog : reinst[:]
nthr : std.size
thr : rethread#[:]
@@ -27,13 +28,13 @@
type reinst = union
/* direct consumers */
- `Byte byte
- `Range [byte, byte]
- `Dot
+ `Ibyte byte
+ `Irange [byte, byte]
+ `Idot
- `Match /* found the end of the expr */
+ `Imatch /* found the end of the expr */
- `Split [std.size, std.size]
- `Jmp std.size
+ `Ifork [std.size, std.size]
+ `Ijmp std.size
;;
;;