shithub: lpa

ref: 2b23d05d57743af57385cd42c0fd2d223b11d8c8
dir: /parse.c/

View raw version
#include <u.h>
#include <libc.h>
#include <thread.h>

#include "dat.h"
#include "fns.h"

static int peek(TokenList *);
static int peekclass(TokenList *);
static void match(TokenList *, int);
static void addchild(Ast *, Ast *);
static int issep(TokenList *t);
static int isexprsep(TokenList *t);
static int nameclass(char *, Symtab *, Ast *);

static void parsesep(TokenList *);
static void parseseps(TokenList *, int);
static Ast *parseprog(TokenList *);
static Ast *parsefuncdef(TokenList *);
static Ast *parsefuncheader(TokenList *);
static Ast *parsefuncleftopt(TokenList *);
static Ast *parselocals(TokenList *);
static Ast *parseexpr(TokenList *, Symtab *, Ast *);
static Ast *parseexprsub(TokenList *);
static Ast *parseline(TokenList *);
static Ast *parsename(TokenList *);
static Ast *parsefunc(TokenList *);
static Ast *parseconst(TokenList *);

Ast *
parse(TokenList *tokens, Symtab *symtab)
{
	Ast *ast;

	tokens->offset = 0;
	if(symtab)
		ast = parseexpr(tokens, symtab, nil);
	else
		ast = parseprog(tokens);
	match(tokens, TokEnd);

	return ast;
}

static int
peek(TokenList *tokens)
{
	if(tokens->offset >= tokens->count)
		error(ESyntax, "unexpected end of token stream");

	return tokens->tokens[tokens->offset].tag;
}

static int
peekclass(TokenList *tokens)
{
	peek(tokens); /* triggers an error if we are at the end */
	return tokens->tokens[tokens->offset].nameclass;
}

static void
match(TokenList *tokens, int tag)
{
	if(peek(tokens) != tag)
		error(ESyntax, "Unexpected token: %s", printtok(tokens->tokens[tokens->offset]));
	tokens->offset++;
}

static void
addchild(Ast *ast, Ast *child)
{
	ast->childcount++;
	ast->children = allocextra(ast, sizeof(Ast *) * ast->childcount);
	ast->children[ast->childcount-1] = child;
}

static int
issep(TokenList *t)
{
	switch(peek(t)){
	case TokNewline:
	case TokDiamond:
		return 1;
	default:
		return 0;
	}
}

static int
isexprsep(TokenList *t)
{
	switch(peek(t)){
	case TokNewline:
	case TokDiamond:
	case TokDel:
	case TokEnd:
		return 1;
	default:
		return 0;
	}
}

static int
nameclass(char *name, Symtab *s, Ast *func)
{
	int class = NameclassUndef;
	
	if(func == nil)
		class = NameclassUndef;
	else if(strcmp(name, func->funcname->name) == 0)
		class = NameclassFunc;
	else if(func->funcresult && strcmp(name, func->funcresult->name) == 0)
		class = NameclassLocal;
	else if(func->funcleftarg && strcmp(name, func->funcleftarg->name) == 0)
		class = NameclassLocal;
	else if(func->funcrightarg && strcmp(name, func->funcrightarg->name) == 0)
		class = NameclassLocal;
	
	if(s && class == NameclassUndef){
		uvlong symid = sym(s, name);
		void *val = symval(s, symid);

		if(val) switch(getalloctag(val)){
		case DataArray:
			class = NameclassArray;
			break;
		case DataFunction:
			class = NameclassFunc;
			break;
		/* more cases here in the future */
		}
	}

	/* TODO: Check if the name exist in the locallist */

	return class;
}

static void
parsesep(TokenList *t)
{
	if(issep(t))
		match(t, peek(t));
}

static void
parseseps(TokenList *t, int required)
{
	while(issep(t)){
		match(t, peek(t));
		required = 0;
	}
	if(required)
		match(t, TokNewline);
}

static Ast *
parseprog(TokenList *t)
{
	Ast *prog = alloc(DataAst);
	prog->tag = AstProg;

	while(peek(t) != TokEnd){
		Ast *child = nil;
		switch(peek(t)){
		case TokDiamond:
		case TokNewline:
			break;
		case TokDel:
			child = parsefuncdef(t);
			break;
		default:
			child = parseexpr(t, nil, nil);
			break;
		}
		if(peek(t) != TokEnd)
			parseseps(t, 1);
		if(child)
			addchild(prog, child);
	}
	return prog;
}

static Ast *
parsefuncdef(TokenList *t)
{
	Ast *func = parsefuncheader(t);
	while(peek(t) != TokDel){
		Ast *expr = parseexpr(t, nil, func);
		addchild(func, expr);
		if(peek(t) != TokDel)
			parseseps(t, 1);
	}
	match(t, TokDel);

	return func;
}

static Ast *
parsefuncheader(TokenList *t)
{
	Ast *func = alloc(DataAst);
	func->tag = AstFunc;

	match(t, TokDel);

	func->funcleftarg = parsefuncleftopt(t); /* optional left arg */

	func->funcname = parsename(t);
	if(peek(t) == TokLarrow && !func->funcleftarg){
		match(t, TokLarrow);
		func->funcresult = func->funcname;
		func->funcleftarg = parsefuncleftopt(t);
		func->funcname = parsename(t);
	}
	if(peek(t) == TokName)
		func->funcrightarg = parsename(t);
	if(peek(t) == TokName && !func->funcleftarg){
		func->funcleftarg = func->funcname;
		func->funcname = func->funcrightarg;
		func->funcrightarg = parsename(t);
	}
	func->funclocals = parselocals(t);

	return func;
}

static Ast *
parsefuncleftopt(TokenList *t)
{
	Ast *name = nil;
	if(peek(t) == TokLbrace){
		match(t, TokLbrace);
		name = parsename(t);
		name->optional = 1;
		match(t, TokRbrace);
	}
	return name;
}

static Ast *
parselocals(TokenList *t)
{
	Ast *locals = alloc(DataAst);
	locals->tag = AstLocals;
	while(peek(t) == TokSemi){
		match(t, TokSemi);
		Ast *name = parsename(t);
		name->nameclass = NameclassLocal;
		addchild(locals, name);
	}
	parseseps(t, 1);
	return locals;
}

static Ast *
parseexpr(TokenList *t, Symtab *symtab, Ast *func)
{
	uvlong start, end;
	vlong depth;

	depth = 0;
	start = t->offset;
	while(!isexprsep(t) || depth != 0){
		switch(peek(t)){
		case TokLparen: depth++; break;
		case TokRparen: depth--; break;
		}
		match(t, peek(t));
	}
	end = t->offset;
	t->offset = start;

	for(uvlong i = start; i < end; i++){
		char *name;
		int class;

		if(t->tokens[i].tag != TokName)
			continue;
		if((i+1) < end && t->tokens[i+1].tag == TokLarrow)
			continue; /* assignment */

		name = t->tokens[i].name;
		class = nameclass(name, symtab, func);
		t->tokens[i].nameclass = class;
		if(class == 0){ /* We don't know how to parse it until runtime */
			if(symtab)
				error(EValue, "%s is undefined", name);

			uvlong count = end-start;
			Ast *later = alloc(DataAst);
			later->tag = AstLater;
			later->tokens = alloc(DataTokenList);
			later->tokens->count = count+1;
			later->tokens->tokens = allocextra(later->tokens, sizeof(Token) * later->tokens->count);
			for(i = 0; i < count; i++){
				later->tokens->tokens[i] = t->tokens[start+i];
				match(t, peek(t));
			}
			later->tokens->tokens[count].tag = TokEnd;
			return later;
		}
	}

	/* We know the nameclass of each name, and assume that the nameclasses do not change.
	 * Now create the AST.
	 */
	return parseexprsub(t);
}

static Ast *
parseexprsub(TokenList *t)
{
	Ast *expr, *val, *strand;
	
	strand = nil;
again:
	if(peek(t) == TokLparen){
		match(t, TokLparen);
		val = parseexprsub(t);
		match(t, TokRparen);
	}else
		val = nil;

	if(peekclass(t) == NameclassFunc){
func:
		expr = alloc(DataAst);
		expr->func = parsefunc(t);
		if(val == nil && (isexprsep(t) || peek(t) == TokRparen))
			expr->tag = AstNiladic;
		else{
			if(val){
				expr->tag = AstDyadic;
				expr->left = val;
			}else
				expr->tag = AstMonadic;
			expr->right = parseexprsub(t);
		}
		val = expr;
		goto end;
	}

	if(peek(t) == TokName){
		val = parsename(t);
		if(peek(t) == TokLarrow){
			match(t, TokLarrow);
			expr = alloc(DataAst);
			expr->tag = AstAssign;
			expr->left = val;
			expr->right = parseexprsub(t);
			val = expr;
			goto end;
		}
	}

	/* We need a value now */
	if(val == nil)
		val = parseconst(t);

	if(peekclass(t) == NameclassFunc){
		if(strand){
			addchild(strand, val);
			val = strand;
			strand = nil;
		}
		goto func;
	}

	if(!(isexprsep(t) || peek(t) == TokRparen)){ /* Stranding */
		if(!strand){
			strand = alloc(DataAst);
			strand->tag = AstStrand;
		}
		addchild(strand, val);
		goto again;
	}

end:
	if(strand){
		addchild(strand, val);
		val = strand;
	}

	return val;
}

static Ast *
parsename(TokenList *t)
{
	Ast *name = alloc(DataAst);
	name->tag = AstName;
	name->name = t->tokens[t->offset].name;
	match(t, TokName);
	return name;
}

static Ast *
parsefunc(TokenList *t)
{
	Ast *func;
	if(peek(t) == TokName && peekclass(t) == NameclassFunc)
		func = parsename(t);
	else{
		func = alloc(DataAst);
		func->tag = AstPrim;
		func->prim = t->tokens[t->offset].prim;
		match(t, TokPrimitive);
	}

	return func;
}

static Ast *
parseconst(TokenList *t)
{
	Ast *val = alloc(DataAst);
	val->tag = AstConst;
	vlong num, len;
	Rune *str;

	switch(peek(t)){
	case TokNumber:
		num = t->tokens[t->offset].num;
		val->val = allocarray(TypeNumber, 0, 1);
		setint(val->val, 0, num);
		break;
	case TokString:
		str = t->tokens[t->offset].string;
		len = runestrlen(str);
		if(len == 1)
			val->val = allocarray(TypeChar, 0, len);
		else{
			val->val = allocarray(TypeChar, 1, len);
			setshape(val->val, 0, len);
		}
		for(uvlong i = 0; i < len; i++)
			setchar(val->val, i, str[i]);
		break;
	default:
		match(t, TokNumber); /* TODO: syntax error could be better here */
	}
	match(t, peek(t));
	return val;
}