shithub: mc

ref: 25bfea9b4ab857720f4d184fb2f7e1c42c3fdae5
dir: /interp.myr/

View raw version
use std

use "types.use"

pkg regex =
	const exec	: (re : regex#, str : byte[:] -> bool)
	const find	: (re : regex#, str : byte[:] -> bool)
;;


const exec = {re, str
	var i

	re.debug = true
	re.str = str
	re.strp = 0
	run(re)
	match re.matched
	`std.None:
		-> false
		;;
	`std.Some thr:	
		for i = 0; i < re.nmatch; i++
			std.put("match %z:", i)
			std.put("[%z..%z]\n", thr.mstart[i], thr.mend[i])
		;;
		-> thr.mend[0] == str.len
		;;
	;;
}

const run = {re
	var i

	re.matched = `std.None
	mkthread(re, 0)
	re.thr[0].mstart = std.slalloc(re.nmatch)
	re.thr[0].mend = std.slalloc(re.nmatch)
	for i = 0; i < re.nmatch; i++
		re.thr[0].mstart[i] = -1
		re.thr[0].mend[i] = -1
	;;
	while re.nthr > 0
		for i = 0; i < re.nthr; i++
			trace(re, "tid=%z, ip=%z, c=b\n", re.thr[i].uid, re.thr[i].ip, re.str[re.strp])
			step(re, i)
		;;
		if re.nthr > 0
			re.strp++
		;;
	;;
}

const step = {re, tid
	var thr
	var str
	var mstart
	var mend

	thr = re.thr[tid]
	str = re.str
	match re.prog[thr.ip]
	/* Char matching. Consume exactly one byte from the string. */
	`Ibyte b:
		trace(re, "\t%z:\tByte %b\n", thr.ip, b)
		if !in(re, str)
			kill(re, tid, "end of string")
		elif b != str[re.strp]
			kill(re, tid, "not right char")
		else
			thr.ip++
			trace(re, "\t\tmatched %b with %b\n", b, str[re.strp])
		;;
		;;
	`Irange (start, end):
		trace(re, "\t%z:\tRange (%b, %b)\t", thr.ip, start, end)
		if !in(re, str) || start > str[re.strp] || end < str[re.strp]
			kill(re, tid, "bad range")
		else
			thr.ip++
		;;
		;;
	`Idot:
		trace(re, "\t%z:\tDot\n", thr.ip)
		if in(re, str)
			kill(re, tid, "past end")
		else
			thr.ip++
		;;
		;;
	/*
	  Non-consuming. All of these recursively call step() until
	  exactly one byte is consumed from the string.
	 */
	`Ibol:
		trace(re, "\t%z:\tBol\n", thr.ip)
		if re.strp == 0 || str[re.strp -1] == 0x10
			thr.ip++
			step(re, tid)
		else
			kill(re, tid, "not beginning of line")
		;;
		;;
	`Ieol:
		trace(re, "\t%z:\tEol\n", thr.ip)
		if re.strp == str.len || str[re.strp] == 0x10
			step(re, tid)
		else
			kill(re, tid, "not end of line")
		;;
		;;
	`Ilbra	m:
		trace(re, "\t%z:\tLbra %z\n", thr.ip, m)
		trace(re, "\t\tmatch start = %z\n", re.strp)
		thr.mstart[m] = re.strp
		thr.ip++
		step(re, tid)
		;;
	`Irbra	m:
		trace(re, "\t%z:\tRbra %z\n", thr.ip, m)
		thr.mend[m] = re.strp
		thr.ip++
		step(re, tid)
		;;
	`Ifork	(lip, rip):
		trace(re, "\t%z:\tFork (%z, %z)\n", thr.ip, rip, lip)
		mstart = std.sldup(thr.mstart)
		mend = std.sldup(thr.mend)
		jmp(re, tid, rip)
		fork(re, thr, lip, mstart, mend)
		;;
	`Ijmp ip:
		trace(re, "\t%z:\tJmp %z\n", thr.ip, ip)
		jmp(re, tid, ip)
		;;
	`Imatch:
		trace(re, "\t%z:\tMatch\n", thr.ip)
		finish(re, tid)
		;;
	;;
}

const jmp = {re, tid, ip
	re.thr[tid].ip = ip
	step(re, tid)
}

var uid = 0

const mkthread = {re, ip
	var thr : rethread#
	var tid

	tid = re.nthr
	if re.nthr >= re.thr.len
		re.thr = std.slgrow(re.thr, std.max(1, re.nthr * 2))
	;;

	thr = std.alloc()
	thr.ip = ip
	thr.uid = uid++

	re.thr[re.nthr] = thr
	re.nthr++
	-> tid
}

const fork = {re, thr, ip, mstart, mend
	var tid

	tid = mkthread(re, ip)
	re.thr[tid].mstart = mstart
	re.thr[tid].mend = mend
	step(re, tid)
}

const kill = {re, tid, msg
	/* 
	   free the dying thread, and shuffle the last
	   thread into the it's place in the thread list
	*/
	trace(re, "\t\tkill %z: %s\n", re.thr[tid].uid, msg)
	std.slfree(re.thr[tid].mstart)
	std.slfree(re.thr[tid].mend)
	std.free(re.thr[tid])
	re.thr[tid] = re.thr[re.nthr - 1]
	re.nthr--
}

const finish = {re, tid
	trace(re, "finish\n", tid)
	match re.matched
	`std.Some thr:	std.free(thr);;
	`std.None:	;;
	;;
	re.matched = `std.Some re.thr[tid]
	re.thr[tid] = re.thr[re.nthr - 1]
	re.nthr--
}

const in = {re, str 
	-> re.strp < str.len
}

const trace : (re : regex#, msg : byte[:], args : ...) = {re, msg, args
	if re.debug
		std.putv(msg, std.vastart(&args))
	;;
}