shithub: purgatorio

ref: a5cb451b299b03f44154fac5780b6a57ca130ce0
dir: /appl/cmd/mash/eyacc.b/

View raw version
implement Yacc;

include "sys.m";
	sys: Sys;
	print, fprint, sprint: import sys;
	UTFmax: import Sys;

include "bufio.m";
	bufio: Bufio;
	Iobuf: import bufio;

include "draw.m";

Yacc: module
{
	init:	fn(ctxt: ref Draw->Context, argv: list of string);
};

Arg: adt
{
	argv:	list of string;
	c:	int;
	opts:	string;

	init:	fn(argv: list of string): ref Arg;
	opt:	fn(arg: self ref Arg): int;
	arg:	fn(arg: self ref Arg): string;
};

PARSER:		con "./eyaccpar";
OFILE:		con "tab.b";
FILEU:		con "output";
FILED:		con "tab.m";
FILEDEBUG:	con "debug";

# the following are adjustable
# according to memory size
ACTSIZE:	con 30000;
NSTATES:	con 2000;
TEMPSIZE:	con 2000;

SYMINC:		con 50;				# increase for non-term or term
RULEINC:	con 50;				# increase for max rule length	prodptr[i]
PRODINC:	con 100;			# increase for productions	prodptr
WSETINC:	con 50;				# increase for working sets	wsets
STATEINC:	con 200;			# increase for states		statemem

NAMESIZE:	con 50;
NTYPES:		con 63;
ISIZE:		con 400;

PRIVATE:	con 16rE000;			# unicode private use

# relationships which must hold:
#	TEMPSIZE >= NTERMS + NNONTERM + 1
#	TEMPSIZE >= NSTATES
#

NTBASE:		con 8r10000;
ERRCODE:	con 8190;
ACCEPTCODE:	con 8191;
YYLEXUNK:	con 3;
TOKSTART:	con 4;				#index of first defined token

# no, left, right, binary assoc.
NOASC, LASC, RASC, BASC: con iota;

# flags for state generation
DONE, MUSTDO, MUSTLOOKAHEAD: con iota;

# flags for a rule having an action, and being reduced
ACTFLAG:	con 16r4;
REDFLAG:	con 16r8;

# output parser flags
YYFLAG1:	con -1000;

# parse tokens
IDENTIFIER, MARK, TERM, LEFT, RIGHT, BINARY, PREC, LCURLY, IDENTCOLON, NUMBER, START, TYPEDEF, TYPENAME, MODULE: con PRIVATE+iota;

ENDFILE:	con 0;

EMPTY:		con 1;
WHOKNOWS:	con 0;
OK:		con 1;
NOMORE:		con -1000;

# macros for getting associativity and precedence levels
ASSOC(i: int): int
{
	return i & 3;
}

PLEVEL(i: int): int
{
	return (i >> 4) & 16r3f;
}

TYPE(i: int): int
{
	return (i >> 10) & 16r3f;
}

# macros for setting associativity and precedence levels
SETASC(i, j: int): int
{
	return i | j;
}

SETPLEV(i, j: int): int
{
	return i | (j << 4);
}

SETTYPE(i, j: int): int
{
	return i | (j << 10);
}

# I/O descriptors
stderr:		ref Sys->FD;
fdefine:	ref Iobuf;			# file for module definition
fdebug:		ref Iobuf;			# y.debug for strings for debugging
ftable:		ref Iobuf;			# y.tab.c file
finput:		ref Iobuf;			# input file
foutput:	ref Iobuf;			# y.output file

CodeData, CodeMod, CodeAct: con iota;
NCode:	con 8192;

Code: adt
{
	kind:	int;
	data:	array of byte;
	ndata:	int;
	next:	cyclic ref Code;
};

codehead:	ref Code;
codetail:	ref Code;

modname:	string;				# name of module

# communication variables between various I/O routines
infile:		string;				# input file name
numbval:	int;				# value of an input number
tokname:	string;				# input token name, slop for runes and 0

# structure declarations
Lkset: type array of int;

Pitem: adt
{
	prod:	array of int;
	off:	int;				# offset within the production
	first:	int;				# first term or non-term in item
	prodno:	int;				# production number for sorting
};

Item: adt
{
	pitem:	Pitem;
	look:	Lkset;
};

Symb: adt
{
	name:	string;
	value:	int;
};

Wset: adt
{
	pitem:	Pitem;
	flag:	int;
	ws:	Lkset;
};

	# storage of names

parser :=	PARSER;
yydebug:	string;

	# storage of types
ntypes:		int;				# number of types defined
typeset :=	array[NTYPES] of string;	# pointers to type tags

	# token information

ntokens :=	0;				# number of tokens
tokset:		array of Symb;
toklev:		array of int;			# vector with the precedence of the terminals

	# nonterminal information

nnonter :=	-1;				# the number of nonterminals
nontrst:	array of Symb;
start:		int;				# start symbol

	# state information

nstate := 	0;				# number of states
pstate :=	array[NSTATES+2] of int;	# index into statemem to the descriptions of the states
statemem :	array of Item;
tystate :=	array[NSTATES] of int;		# contains type information about the states
tstates :	array of int;			# states generated by terminal gotos
ntstates :	array of int; 			# states generated by nonterminal gotos
mstates :=	array[NSTATES] of {* => 0};	# chain of overflows of term/nonterm generation lists
lastred: 	int; 				# number of last reduction of a state
defact :=	array[NSTATES] of int;		# default actions of states

	# lookahead set information

lkst: array of Lkset;
nolook := 0;					# flag to turn off lookahead computations
tbitset := 0;					# size of lookahead sets
clset: Lkset;  					# temporary storage for lookahead computations

	# working set information

wsets:	array of Wset;
cwp:	int;

	# storage for action table

amem:	array of int;				# action table storage
memp:	int;					# next free action table position
indgo := array[NSTATES] of int;			# index to the stored goto table

	# temporary vector, indexable by states, terms, or ntokens

temp1 :=	array[TEMPSIZE] of int;		# temporary storage, indexed by terms + ntokens or states
lineno :=	1;				# current input line number
fatfl :=	1;  				# if on, error is fatal
nerrors :=	0;				# number of errors

	# assigned token type values
extval :=	0;

ytabc :=	OFILE;	# name of y.tab.c

	# grammar rule information

nprod := 1;					# number of productions
prdptr: array of array of int;			# pointers to descriptions of productions
levprd: array of int;				# precedence levels for the productions
rlines: array of int;				# line number for this rule


	# statistics collection variables

zzgoent := 0;
zzgobest := 0;
zzacent := 0;
zzexcp := 0;
zzclose := 0;
zzrrconf := 0;
zzsrconf := 0;
zzstate := 0;

	# optimizer arrays
yypgo:	array of array of int;
optst:	array of array of int;
ggreed:	array of int;
pgo:	array of int;

maxspr: int;  		# maximum spread of any entry
maxoff: int;  		# maximum offset into a array
maxa:	int;

	# storage for information about the nonterminals

pres: array of array of array of int;		# vector of pointers to productions yielding each nonterminal
pfirst: array of Lkset;
pempty:	array of int;				# vector of nonterminals nontrivially deriving e
	# random stuff picked out from between functions

indebug	:= 0;		# debugging flag for cpfir
pidebug	:= 0;		# debugging flag for putitem
gsdebug	:= 0;		# debugging flag for stagen
cldebug	:= 0;		# debugging flag for closure
pkdebug	:= 0;		# debugging flag for apack
g2debug	:= 0;		# debugging for go2gen
adb	:= 0;		# debugging for callopt

Resrv : adt
{
	name:	string;
	value:	int;
};

resrv := array[] of {
	Resrv("binary",		BINARY),
	Resrv("module",		MODULE),
	Resrv("left",		LEFT),
	Resrv("nonassoc",	BINARY),
	Resrv("prec",		PREC),
	Resrv("right",		RIGHT),
	Resrv("start",		START),
	Resrv("term",		TERM),
	Resrv("token",		TERM),
	Resrv("type",		TYPEDEF),};

zznewstate := 0;

init(nil: ref Draw->Context, argv: list of string)
{
	sys = load Sys Sys->PATH;
	bufio = load Bufio Bufio->PATH;

	stderr = sys->fildes(2);

	setup(argv);		# initialize and read productions

	tbitset = (ntokens+32)/32;
	cpres();		# make table of which productions yield a given nonterminal
	cempty();		# make a table of which nonterminals can match the empty string
	cpfir();		# make a table of firsts of nonterminals

	stagen();		# generate the states

	yypgo = array[nnonter+1] of array of int;
	optst = array[nstate] of array of int;
	output();		# write the states and the tables
	go2out();

	hideprod();
	summary();

	callopt();

	others();

	bufio->flush();
}

setup(argv: list of string)
{
	j, ty: int;

	ytab := 0;
	vflag := 0;
	dflag := 0;
	stem := 0;
	stemc := "y";
	foutput = nil;
	fdefine = nil;
	fdebug = nil;
	arg := Arg.init(argv);
	while(c := arg.opt()){
		case c{
		'v' or 'V' =>
			vflag++;
		'D' =>
			yydebug = arg.arg();
		'd' =>
			dflag++;
		'o' =>
			ytab++;
			ytabc = arg.arg();
		's' =>
			stem++;
			stemc = arg.arg();
		* =>
			usage();
		}
	}
	argv = arg.argv;
	if(len argv != 1)
		usage();
	infile = hd argv;
	finput = bufio->open(infile, Bufio->OREAD);
	if(finput == nil)
		error("cannot open '"+infile+"'");

	openup(stemc, dflag, vflag, ytab, ytabc);

	defin(0, "$end");
	extval = PRIVATE;	# tokens start in unicode 'private use'
	defin(0, "error");
	defin(1, "$accept");
	defin(0, "$unk");
	i := 0;

	for(t := gettok(); t != MARK && t != ENDFILE; )
	case t {
	';' =>
		t = gettok();

	START =>
		if(gettok() != IDENTIFIER)
			error("bad %%start construction");
		start = chfind(1, tokname);
		t = gettok();

	TYPEDEF =>
		if(gettok() != TYPENAME)
			error("bad syntax in %%type");
		ty = numbval;
		for(;;) {
			t = gettok();
			case t {
			IDENTIFIER =>
				if((t=chfind(1, tokname)) < NTBASE) {
					j = TYPE(toklev[t]);
					if(j != 0 && j != ty)
						error("type redeclaration of token "+
							tokset[t].name);
					else
						toklev[t] = SETTYPE(toklev[t], ty);
				} else {
					j = nontrst[t-NTBASE].value;
					if(j != 0 && j != ty)
						error("type redeclaration of nonterminal "+
							nontrst[t-NTBASE].name);
					else
						nontrst[t-NTBASE].value = ty;
				}
				continue;
			',' =>
				continue;
			';' =>
				t = gettok();
			}
			break;
		}

	MODULE =>
		cpymodule();
		t = gettok();

	LEFT or BINARY or RIGHT or TERM =>
		# nonzero means new prec. and assoc.
		lev := t-TERM;
		if(lev)
			i++;
		ty = 0;

		# get identifiers so defined
		t = gettok();

		# there is a type defined
		if(t == TYPENAME) {
			ty = numbval;
			t = gettok();
		}
		for(;;) {
			case t {
			',' =>
				t = gettok();
				continue;

			';' =>
				break;

			IDENTIFIER =>
				j = chfind(0, tokname);
				if(j >= NTBASE)
					error(tokname+" defined earlier as nonterminal");
				if(lev) {
					if(ASSOC(toklev[j]))
						error("redeclaration of precedence of "+tokname);
					toklev[j] = SETASC(toklev[j], lev);
					toklev[j] = SETPLEV(toklev[j], i);
				}
				if(ty) {
					if(TYPE(toklev[j]))
						error("redeclaration of type of "+tokname);
					toklev[j] = SETTYPE(toklev[j],ty);
				}
				t = gettok();
				if(t == NUMBER) {
					tokset[j].value = numbval;
					t = gettok();
				}
				continue;
			}
			break;
		}

	LCURLY =>
		cpycode();
		t = gettok();

	* =>
		error("syntax error");
	}
	if(t == ENDFILE)
		error("unexpected EOF before %%");
	if(modname == nil)
		error("missing %module specification");

	moreprod();
	prdptr[0] = array[4] of {
		NTBASE,		# added production
		start,		# if start is 0, we will overwrite with the lhs of the first rule
		1,
		0
	};
	nprod = 1;
	curprod := array[RULEINC] of int;
	t = gettok();
	if(t != IDENTCOLON)
		error("bad syntax on first rule");

	if(!start)
		prdptr[0][1] = chfind(1, tokname);

	# read rules
	# put into prdptr array in the format
	# target
	# followed by id's of terminals and non-terminals
	# followd by -nprod
	while(t != MARK && t != ENDFILE) {
		mem := 0;
		# process a rule
		rlines[nprod] = lineno;
		if(t == '|')
			curprod[mem++] = prdptr[nprod-1][0];
		else if(t == IDENTCOLON) {
			curprod[mem] = chfind(1, tokname);
			if(curprod[mem] < NTBASE)
				error("token illegal on LHS of grammar rule");
			mem++;
		} else
			error("illegal rule: missing semicolon or | ?");

		# read rule body
		t = gettok();

		for(;;){
			while(t == IDENTIFIER) {
				curprod[mem] = chfind(1, tokname);
				if(curprod[mem] < NTBASE)
					levprd[nprod] = toklev[curprod[mem]];
				mem++;
				if(mem >= len curprod){
					ncurprod := array[mem+RULEINC] of int;
					ncurprod[0:] = curprod;
					curprod = ncurprod;
				}
				t = gettok();
			}
			if(t == PREC) {
				if(gettok() != IDENTIFIER)
					error("illegal %%prec syntax");
				j = chfind(2, tokname);
				if(j >= NTBASE)
					error("nonterminal "+nontrst[j-NTBASE].name+" illegal after %%prec");
				levprd[nprod] = toklev[j];
				t = gettok();
			}
			if(t != '=')
				break;
			levprd[nprod] |= ACTFLAG;
			addcode(CodeAct, "\n"+string nprod+"=>");
			cpyact(curprod, mem);

			# action within rule...
			if((t=gettok()) == IDENTIFIER) {
				# make it a nonterminal
				j = chfind(1, "$$"+string nprod);

				#
				# the current rule will become rule number nprod+1
				# enter null production for action
				#
				prdptr[nprod] = array[2] of {j, -nprod};

				# update the production information
				nprod++;
				moreprod();
				levprd[nprod] = levprd[nprod-1] & ~ACTFLAG;
				levprd[nprod-1] = ACTFLAG;
				rlines[nprod] = lineno;

				# make the action appear in the original rule
				curprod[mem++] = j;
				if(mem >= len curprod){
					ncurprod := array[mem+RULEINC] of int;
					ncurprod[0:] = curprod;
					curprod = ncurprod;
				}
			}
		}

		while(t == ';')
			t = gettok();
		curprod[mem++] = -nprod;

		# check that default action is reasonable
		if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[curprod[0]-NTBASE].value) {
			# no explicit action, LHS has value

			tempty := curprod[1];
			if(tempty < 0)
				error("must return a value, since LHS has a type");
			else
				if(tempty >= NTBASE)
					tempty = nontrst[tempty-NTBASE].value;
				else
					tempty = TYPE(toklev[tempty]);
			if(tempty != nontrst[curprod[0]-NTBASE].value)
				error("default action causes potential type clash");
			else{
				addcodec(CodeAct, '\n');
				addcode(CodeAct, string nprod);
				addcode(CodeAct, "=>\ne.yyval.");
				addcode(CodeAct, typeset[tempty]);
				addcode(CodeAct, " = yys[yyp+1].yyv.");
				addcode(CodeAct, typeset[tempty]);
				addcodec(CodeAct, ';');
			}
		}
		moreprod();
		prdptr[nprod] = array[mem] of int;
		prdptr[nprod][0:] = curprod[:mem];
		nprod++;
		moreprod();
		levprd[nprod] = 0;
	}

	#
	# end of all rules
	# dump out the prefix code
	#
	ftable.puts("implement ");
	ftable.puts(modname);
	ftable.puts(";\n");

	dumpcode(CodeMod);
	dumpmod();
	dumpcode(CodeAct);

	ftable.puts("YYEOFCODE: con 1;\n");
	ftable.puts("YYERRCODE: con 2;\n");
	ftable.puts("YYMAXDEPTH: con 200;\n");	# was 150
#	ftable.puts("yyval: YYSTYPE;\n");

	#
	# copy any postfix code
	#
	if(t == MARK) {
		ftable.puts("\n#line\t");
		ftable.puts(string lineno);
		ftable.puts("\t\"");
		ftable.puts(infile);
		ftable.puts("\"\n");
		while((c=finput.getc()) != Bufio->EOF)
			ftable.putc(c);
	}
	finput.close();
}

#
# allocate enough room to hold another production
#
moreprod()
{
	n := len prdptr;
	if(nprod < n)
		return;
	n += PRODINC;
	aprod := array[n] of array of int;
	aprod[0:] = prdptr;
	prdptr = aprod;

	alevprd := array[n] of int;
	alevprd[0:] = levprd;
	levprd = alevprd;

	arlines := array[n] of int;
	arlines[0:] = rlines;
	rlines = arlines;
}

#
# define s to be a terminal if t=0
# or a nonterminal if t=1
#
defin(nt: int, s: string): int
{
	val := 0;
	if(nt) {
		nnonter++;
		if(nnonter >= len nontrst){
			anontrst := array[nnonter + SYMINC] of Symb;
			anontrst[0:] = nontrst;
			nontrst = anontrst;
		}
		nontrst[nnonter] = Symb(s, 0);
		return NTBASE + nnonter;
	}

	# must be a token
	ntokens++;
	if(ntokens >= len tokset){
		atokset := array[ntokens + SYMINC] of Symb;
		atokset[0:] = tokset;
		tokset = atokset;

		atoklev := array[ntokens + SYMINC] of int;
		atoklev[0:] = toklev;
		toklev = atoklev;
	}
	tokset[ntokens].name = s;
	toklev[ntokens] = 0;

	# establish value for token
	# single character literal
	if(s[0] == ' ' && len s == 1+1){
		val = s[1];
	}else if(s[0] == ' ' && s[1] == '\\') { # escape sequence
		if(len s == 2+1) {
			# single character escape sequence
			case s[2] {
			'\'' =>	val = '\'';
			'"' =>	val = '"';
			'\\' =>	val = '\\';
			'a' =>	val = '\a';
			'b' =>	val = '\b';
			'n' =>	val = '\n';
			'r' =>	val = '\r';
			't' =>	val = '\t';
			'v' =>	val = '\v';
			* =>
				error("invalid escape "+s[1:3]);
			}
		}else if(s[2] == 'u' && len s == 2+1+4) { # \unnnn sequence
			val = 0;
			s = s[3:];
			while(s != ""){
				c := s[0];
				if(c >= '0' && c <= '9')
					c -= '0';
				else if(c >= 'a' && c <= 'f')
					c -= 'a' - 10;
				else if(c >= 'A' && c <= 'F')
					c -= 'A' - 10;
				else
					error("illegal \\unnnn construction");
				val = val * 16 + c;
				s = s[1:];
			}
			if(val == 0)
				error("'\\u0000' is illegal");
		}else
			error("unknown escape");
	}else
		val = extval++;

	tokset[ntokens].value = val;
	return ntokens;
}

peekline := 0;
gettok(): int
{
	i, match, c: int;

	tokname = "";
	for(;;){
		reserve := 0;
		lineno += peekline;
		peekline = 0;
		c = finput.getc();
		while(c == ' ' || c == '\n' || c == '\t' || c == '\v' || c == '\r') {
			if(c == '\n')
				lineno++;
			c = finput.getc();
		}

		# skip comment
		if(c != '#')
			break;
		lineno += skipcom();
	}
	case c {
	Bufio->EOF =>
		return ENDFILE;

	'{' =>
		finput.ungetc();
		return '=';

	'<' =>
		# get, and look up, a type name (union member name)
		i = 0;
		while((c=finput.getc()) != '>' && c != Bufio->EOF && c != '\n')
			tokname[i++] = c;
		if(c != '>')
			error("unterminated < ... > clause");
		for(i=1; i<=ntypes; i++)
			if(typeset[i] == tokname) {
				numbval = i;
				return TYPENAME;
			}
		ntypes++;
		numbval = ntypes;
		typeset[numbval] = tokname;
		return TYPENAME;

	'"' or '\'' =>
		match = c;
		tokname[0] = ' ';
		i = 1;
		for(;;) {
			c = finput.getc();
			if(c == '\n' || c == Bufio->EOF)
				error("illegal or missing ' or \"" );
			if(c == '\\') {
				tokname[i++] = '\\';
				c = finput.getc();
			} else if(c == match)
				return IDENTIFIER;
			tokname[i++] = c;
		}

	'%' =>
		case c = finput.getc(){
		'%' =>	return MARK;
		'=' =>	return PREC;
		'{' =>	return LCURLY;
		}

		getword(c);
		# find a reserved word
		for(c=0; c < len resrv; c++)
			if(tokname == resrv[c].name)
				return resrv[c].value;
		error("invalid escape, or illegal reserved word: "+tokname);

	'0' to '9' =>
		numbval = c - '0';
		while(isdigit(c = finput.getc()))
			numbval = numbval*10 + c-'0';
		finput.ungetc();
		return NUMBER;

	* =>
		if(isword(c) || c=='.' || c=='$')
			getword(c);
		else
			return c;
	}

	# look ahead to distinguish IDENTIFIER from IDENTCOLON
	c = finput.getc();
	while(c == ' ' || c == '\t'|| c == '\n' || c == '\v' || c == '\r' || c == '#') {
		if(c == '\n')
			peekline++;
		# look for comments
		if(c == '#')
			peekline += skipcom();
		c = finput.getc();
	}
	if(c == ':')
		return IDENTCOLON;
	finput.ungetc();
	return IDENTIFIER;
}

getword(c: int)
{
	i := 0;
	while(isword(c) || isdigit(c) || c == '_' || c=='.' || c=='$') {
		tokname[i++] = c;
		c = finput.getc();
	}
	finput.ungetc();
}

#
# determine the type of a symbol
#
fdtype(t: int): int
{
	v : int;
	s: string;

	if(t >= NTBASE) {
		v = nontrst[t-NTBASE].value;
		s = nontrst[t-NTBASE].name;
	} else {
		v = TYPE(toklev[t]);
		s = tokset[t].name;
	}
	if(v <= 0)
		error("must specify type for "+s);
	return v;
}

chfind(t: int, s: string): int
{
	if(s[0] == ' ')
		t = 0;
	for(i:=0; i<=ntokens; i++)
		if(s == tokset[i].name)
			return i;
	for(i=0; i<=nnonter; i++)
		if(s == nontrst[i].name)
			return NTBASE+i;

	# cannot find name
	if(t > 1)
		error(s+" should have been defined earlier");
	return defin(t, s);
}

#
# saves module definition in Code
#
cpymodule()
{
	if(gettok() != IDENTIFIER)
		error("bad %%module construction");
	if(modname != nil)
		error("duplicate %%module construction");
	modname = tokname;

	level := 0;
	for(;;) {
		if((c:=finput.getc()) == Bufio->EOF)
			error("EOF encountered while processing %%module");
		case c {
		'\n' =>
			lineno++;
		'{' =>
			level++;
			if(level == 1)
				continue;
		'}' =>
			level--;

			# we are finished copying
			if(level == 0)
				return;
		}
		addcodec(CodeMod, c);
	}
}

#
# saves code between %{ and %}
#
cpycode()
{
	c := finput.getc();
	if(c == '\n') {
		c = finput.getc();
		lineno++;
	}
	addcode(CodeData, "\n#line\t" + string lineno + "\t\"" + infile + "\"\n");
	while(c != Bufio->EOF) {
		if(c == '%') {
			if((c=finput.getc()) == '}')
				return;
			addcodec(CodeData, '%');
		}
		addcodec(CodeData, c);
		if(c == '\n')
			lineno++;
		c = finput.getc();
	}
	error("eof before %%}");
}

addcode(k: int, s: string)
{
	for(i := 0; i < len s; i++)
		addcodec(k, s[i]);
}

addcodec(k, c: int)
{
	if(codehead == nil
	|| k != codetail.kind
	|| codetail.ndata >= NCode){
		cd := ref Code(k, array[NCode+UTFmax] of byte, 0, nil);
		if(codehead == nil)
			codehead = cd;
		else
			codetail.next = cd;
		codetail = cd;
	}

	codetail.ndata += sys->char2byte(c, codetail.data, codetail.ndata);
}

dumpcode(til: int)
{
	for(; codehead != nil; codehead = codehead.next){
		if(codehead.kind == til)
			return;
		if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata)
			error("can't write output file");
	}
}

#
# write out the module declaration and any token info
#
dumpmod()
{
	if(fdefine != nil) {
		fdefine.puts(modname);
		fdefine.puts(": module {\n");
	}
	ftable.puts(modname);
	ftable.puts(": module {\n");

	for(; codehead != nil; codehead = codehead.next){
		if(codehead.kind != CodeMod)
			break;
		if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata)
			error("can't write output file");
		if(fdefine != nil && fdefine.write(codehead.data, codehead.ndata) != codehead.ndata)
			error("can't write define file");
	}

	for(i:=TOKSTART; i<=ntokens; i++) {
		# non-literals
		c := tokset[i].name[0];
		if(c != ' ' && c != '$') {
			s := tokset[i].name+": con	"+string tokset[i].value+";\n";
			ftable.puts(s);
			if(fdefine != nil)
				fdefine.puts(s);
		}
	}

	if(fdefine != nil)
		fdefine.puts("};\n");
	ftable.puts("\n};\n");

	if(fdebug != nil) {
		fdebug.puts("yytoknames = array[] of {\n");
		for(i=1; i<=ntokens; i++) {
			if(tokset[i].name != nil)
				fdebug.puts("\t\""+chcopy(tokset[i].name)+"\",\n");
			else
				fdebug.puts("\t\"\",\n");
		}
		fdebug.puts("};\n");
	}
}

#
# skip over comments
# skipcom is called after reading a '#'
#
skipcom(): int
{
	c := finput.getc();
	while(c != Bufio->EOF) {
		if(c == '\n')
			return 1;
		c = finput.getc();
	}
	error("EOF inside comment");
	return 0;
}

#
# copy limbo action to the next ; or closing }
#
cpyact(curprod: array of int, max: int)
{
	addcode(CodeAct, "\n#line\t");
	addcode(CodeAct, string lineno);
	addcode(CodeAct, "\t\"");
	addcode(CodeAct, infile);
	addcode(CodeAct, "\"\n");

	brac := 0;

loop:	for(;;){
		c := finput.getc();
	swt:	case c {
		';' =>
			if(brac == 0) {
				addcodec(CodeAct, c);
				return;
			}
	
		'{' =>
			brac++;
			
		'$' =>
			s := 1;
			tok := -1;
			c = finput.getc();
	
			# type description
			if(c == '<') {
				finput.ungetc();
				if(gettok() != TYPENAME)
					error("bad syntax on $<ident> clause");
				tok = numbval;
				c = finput.getc();
			}
			if(c == '$') {
				addcode(CodeAct, "e.yyval");
	
				# put out the proper tag...
				if(ntypes) {
					if(tok < 0)
						tok = fdtype(curprod[0]);
					addcode(CodeAct, "."+typeset[tok]);
				}
				continue loop;
			}
			if(c == '-') {
				s = -s;
				c = finput.getc();
			}
			j := 0;
			if(isdigit(c)) {
				while(isdigit(c)) {
					j = j*10 + c-'0';
					c = finput.getc();
				}
				finput.ungetc();
				j = j*s;
				if(j >= max)
					error("Illegal use of $" + string j);
			}else if(isword(c) || c == '_' || c == '.') {
				# look for $name
				finput.ungetc();
				if(gettok() != IDENTIFIER)
					error("$ must be followed by an identifier");
				tokn := chfind(2, tokname);
				fnd := -1;
				if((c = finput.getc()) != '@')
					finput.ungetc();
				else if(gettok() != NUMBER)
					error("@ must be followed by number");
				else
					fnd = numbval;
				for(j=1; j<max; j++){
					if(tokn == curprod[j]) {
						fnd--;
						if(fnd <= 0)
							break;
					}
				}
				if(j >= max)
					error("$name or $name@number not found");
			}else{
				addcodec(CodeAct, '$');
				if(s < 0)
					addcodec(CodeAct, '-');
				finput.ungetc();
				continue loop;
			}
			addcode(CodeAct, "yys[yypt-" + string(max-j-1) + "].yyv");
	
			# put out the proper tag
			if(ntypes) {
				if(j <= 0 && tok < 0)
					error("must specify type of $" + string j);
				if(tok < 0)
					tok = fdtype(curprod[j]);
				addcodec(CodeAct, '.');
				addcode(CodeAct, typeset[tok]);
			}
			continue loop;

		'}' =>
			brac--;
			if(brac)
				break;
			addcodec(CodeAct, c);
			return;

		'#' =>
			# a comment
			addcodec(CodeAct, c);
			c = finput.getc();
			while(c != Bufio->EOF) {
				if(c == '\n') {
					lineno++;
					break swt;
				}
				addcodec(CodeAct, c);
				c = finput.getc();
			}
			error("EOF inside comment");

		'\''or '"' =>
			# character string or constant
			match := c;
			addcodec(CodeAct, c);
			while(c = finput.getc()) {
				if(c == '\\') {
					addcodec(CodeAct, c);
					c = finput.getc();
					if(c == '\n')
						lineno++;
				} else if(c == match)
					break swt;
				if(c == '\n')
					error("newline in string or char const.");
				addcodec(CodeAct, c);
			}
			error("EOF in string or character constant");

		Bufio->EOF =>
			error("action does not terminate");

		'\n' =>
			lineno++;
		}

		addcodec(CodeAct, c);
	}
}

openup(stem: string, dflag, vflag, ytab: int, ytabc: string)
{
	buf: string;
	if(vflag) {
		buf = stem + "." + FILEU;
		foutput = bufio->create(buf, Bufio->OWRITE, 8r666);
		if(foutput == nil)
			error("can't create " + buf);
	}
	if(yydebug != nil) {
		buf = stem + "." + FILEDEBUG;
		fdebug = bufio->create(buf, Bufio->OWRITE, 8r666);
		if(fdebug == nil)
			error("can't create " + buf);
	}
	if(dflag) {
		buf = stem + "." + FILED;
		fdefine = bufio->create(buf, Bufio->OWRITE, 8r666);
		if(fdefine == nil)
			error("can't create " + buf);
	}
	if(ytab == 0)
		buf = stem + "." + OFILE;
	else
		buf = ytabc;
	ftable = bufio->create(buf, Bufio->OWRITE, 8r666);
	if(ftable == nil)
		error("can't create file " + buf);
}

#
# return a pointer to the name of symbol i
#
symnam(i: int): string
{
	s: string;
	if(i >= NTBASE)
		s = nontrst[i-NTBASE].name;
	else
		s = tokset[i].name;
	if(s[0] == ' ')
		s = s[1:];
	return s;
}

#
# write out error comment
#
error(s: string)
{
	nerrors++;
	fprint(stderr, "\n fatal error: %s, %s:%d\n", s, infile, lineno);
	if(!fatfl)
		return;
	summary();
	exit;
#	exits("error");
}

#
# set elements 0 through n-1 to c
#
aryfil(v: array of int, n, c: int)
{
	for(i:=0; i<n; i++)
		v[i] = c;
}

#
# compute an array with the beginnings of productions yielding given nonterminals
# The array pres points to these lists
# the array pyield has the lists: the total size is only NPROD+1
#
cpres()
{
	pres = array[nnonter+1] of array of array of int;
	curres := array[nprod] of array of int;
	for(i:=0; i<=nnonter; i++) {
		n := 0;
		c := i+NTBASE;
		fatfl = 0;  	# make undefined symbols nonfatal
		for(j:=0; j<nprod; j++)
			if(prdptr[j][0] == c)
				curres[n++] = prdptr[j][1:];
		if(n == 0)
			error("nonterminal " + nontrst[i].name + " not defined!");
		else{
			pres[i] = array[n] of array of int;
			pres[i][0:] = curres[:n];
		}
	}
	fatfl = 1;
	if(nerrors) {
		summary();
		exit;		#exits("error");
	}
}

dumppres()
{
	for(i := 0; i <= nnonter; i++){
		print("nonterm %d\n", i);
		curres := pres[i];
		for(j := 0; j < len curres; j++){
			print("\tproduction %d:", j);
			prd := curres[j];
			for(k := 0; k < len prd; k++)
				print(" %d", prd[k]);
			print("\n");
		}
	}
}

#
# mark nonterminals which derive the empty string
# also, look for nonterminals which don't derive any token strings
#
cempty()
{
	i, p, np: int;
	prd: array of int;

	pempty = array[nnonter+1] of int;

	# first, use the array pempty to detect productions that can never be reduced
	# set pempty to WHONOWS
	aryfil(pempty, nnonter+1, WHOKNOWS);

	# now, look at productions, marking nonterminals which derive something
more:	for(;;){
		for(i=0; i<nprod; i++) {
			prd = prdptr[i];
			if(pempty[prd[0] - NTBASE])
				continue;
			np = len prd - 1;
			for(p = 1; p < np; p++)
				if(prd[p] >= NTBASE && pempty[prd[p]-NTBASE] == WHOKNOWS)
					break;
			# production can be derived
			if(p == np) {
				pempty[prd[0]-NTBASE] = OK;
				continue more;
			}
		}
		break;
	}

	# now, look at the nonterminals, to see if they are all OK
	for(i=0; i<=nnonter; i++) {
		# the added production rises or falls as the start symbol ...
		if(i == 0)
			continue;
		if(pempty[i] != OK) {
			fatfl = 0;
			error("nonterminal " + nontrst[i].name + " never derives any token string");
		}
	}

	if(nerrors) {
		summary();
		exit;		#exits("error");
	}

	# now, compute the pempty array, to see which nonterminals derive the empty string
	# set pempty to WHOKNOWS
	aryfil(pempty, nnonter+1, WHOKNOWS);

	# loop as long as we keep finding empty nonterminals

again:	for(;;){
	next:	for(i=1; i<nprod; i++) {
			# not known to be empty
			prd = prdptr[i];
			if(pempty[prd[0]-NTBASE] != WHOKNOWS)
				continue;
			np = len prd - 1;
			for(p = 1; p < np; p++)
				if(prd[p] < NTBASE || pempty[prd[p]-NTBASE] != EMPTY)
					continue next;

			# we have a nontrivially empty nonterminal
			pempty[prd[0]-NTBASE] = EMPTY;
			# got one ... try for another
			continue again;
		}
		return;
	}
}

dumpempty()
{
	for(i := 0; i <= nnonter; i++)
		if(pempty[i] == EMPTY)
			print("non-term %d %s matches empty\n", i, symnam(i+NTBASE));
}

#
# compute an array with the first of nonterminals
#
cpfir()
{
	s, n, p, np, ch: int;
	curres: array of array of int;
	prd: array of int;

	wsets = array[nnonter+WSETINC] of Wset;
	pfirst = array[nnonter+1] of Lkset;
	for(i:=0; i<=nnonter; i++) {
		wsets[i].ws = mkset();
		pfirst[i] = mkset();
		curres = pres[i];
		n = len curres;
		# initially fill the sets
		for(s = 0; s < n; s++) {
			prd = curres[s];
			np = len prd - 1;
			for(p = 0; p < np; p++) {
				ch = prd[p];
				if(ch < NTBASE) {
					setbit(pfirst[i], ch);
					break;
				}
				if(!pempty[ch-NTBASE])
					break;
			}
		}
	}

	# now, reflect transitivity
	changes := 1;
	while(changes) {
		changes = 0;
		for(i=0; i<=nnonter; i++) {
			curres = pres[i];
			n = len curres;
			for(s = 0; s < n; s++) {
				prd = curres[s];
				np = len prd - 1;
				for(p = 0; p < np; p++) {
					ch = prd[p] - NTBASE;
					if(ch < 0)
						break;
					changes |= setunion(pfirst[i], pfirst[ch]);
					if(!pempty[ch])
						break;
				}
			}
		}
	}

	if(!indebug)
		return;
	if(foutput != nil){
		for(i=0; i<=nnonter; i++) {
			foutput.putc('\n');
			foutput.puts(nontrst[i].name);
			foutput.puts(": ");
			prlook(pfirst[i]);
			foutput.putc(' ');
			foutput.puts(string pempty[i]);
			foutput.putc('\n');
		}
	}
}

#
# generate the states
#
stagen()
{
	# initialize
	nstate = 0;
	tstates = array[ntokens+1] of {* => 0};	# states generated by terminal gotos
	ntstates = array[nnonter+1] of {* => 0};# states generated by nonterminal gotos
	amem = array[ACTSIZE] of {* => 0};
	memp = 0;

	clset = mkset();
	pstate[0] = pstate[1] = 0;
	aryfil(clset, tbitset, 0);
	putitem(Pitem(prdptr[0], 0, 0, 0), clset);
	tystate[0] = MUSTDO;
	nstate = 1;
	pstate[2] = pstate[1];

	#
	# now, the main state generation loop
	# first pass generates all of the states
	# later passes fix up lookahead
	# could be sped up a lot by remembering
	# results of the first pass rather than recomputing
	#
	first := 1;
	for(more := 1; more; first = 0){
		more = 0;
		for(i:=0; i<nstate; i++) {
			if(tystate[i] != MUSTDO)
				continue;

			tystate[i] = DONE;
			aryfil(temp1, nnonter+1, 0);

			# take state i, close it, and do gotos
			closure(i);

			# generate goto's
			for(p:=0; p<cwp; p++) {
				pi := wsets[p];
				if(pi.flag)
					continue;
				wsets[p].flag = 1;
				c := pi.pitem.first;
				if(c <= 1) {
					if(pstate[i+1]-pstate[i] <= p)
						tystate[i] = MUSTLOOKAHEAD;
					continue;
				}
				# do a goto on c
				putitem(wsets[p].pitem, wsets[p].ws);
				for(q:=p+1; q<cwp; q++) {
					# this item contributes to the goto
					if(c == wsets[q].pitem.first) {
						putitem(wsets[q].pitem, wsets[q].ws);
						wsets[q].flag = 1;
					}
				}

				if(c < NTBASE)
					state(c);	# register new state
				else
					temp1[c-NTBASE] = state(c);
			}

			if(gsdebug && foutput != nil) {
				foutput.puts(string i + ": ");
				for(j:=0; j<=nnonter; j++)
					if(temp1[j])
						foutput.puts(nontrst[j].name + " " + string temp1[j] + ", ");
				foutput.putc('\n');
			}

			if(first)
				indgo[i] = apack(temp1[1:], nnonter-1) - 1;

			more++;
		}
	}
}

#
# generate the closure of state i
#
closure(i: int)
{
	zzclose++;

	# first, copy kernel of state i to wsets
	cwp = 0;
	q := pstate[i+1];
	for(p:=pstate[i]; p<q; p++) {
		wsets[cwp].pitem = statemem[p].pitem;
		wsets[cwp].flag = 1;			# this item must get closed
		wsets[cwp].ws[0:] = statemem[p].look;
		cwp++;
	}

	# now, go through the loop, closing each item
	work := 1;
	while(work) {
		work = 0;
		for(u:=0; u<cwp; u++) {
			if(wsets[u].flag == 0)
				continue;
			# dot is before c
			c := wsets[u].pitem.first;
			if(c < NTBASE) {
				wsets[u].flag = 0;
				# only interesting case is where . is before nonterminal
				continue;
			}

			# compute the lookahead
			aryfil(clset, tbitset, 0);

			# find items involving c
			for(v:=u; v<cwp; v++) {
				if(wsets[v].flag != 1
				|| wsets[v].pitem.first != c)
					continue;
				pi := wsets[v].pitem.prod;
				ipi := wsets[v].pitem.off + 1;
				
				wsets[v].flag = 0;
				if(nolook)
					continue;
				while((ch := pi[ipi++]) > 0) {
					# terminal symbol
					if(ch < NTBASE) {
						setbit(clset, ch);
						break;
					}
					# nonterminal symbol
					setunion(clset, pfirst[ch-NTBASE]);
					if(!pempty[ch-NTBASE])
						break;
				}
				if(ch <= 0)
					setunion(clset, wsets[v].ws);
			}

			#
			# now loop over productions derived from c
			#
			curres := pres[c - NTBASE];
			n := len curres;
			# initially fill the sets
	nexts:		for(s := 0; s < n; s++) {
				prd := curres[s];
				#
				# put these items into the closure
				# is the item there
				#
				for(v=0; v<cwp; v++) {
					# yes, it is there
					if(wsets[v].pitem.off == 0
					&& wsets[v].pitem.prod == prd) {
						if(!nolook && setunion(wsets[v].ws, clset))
							wsets[v].flag = work = 1;
						continue nexts;
					}
				}

				#  not there; make a new entry
				if(cwp >= len wsets){
					awsets := array[cwp + WSETINC] of Wset;
					awsets[0:] = wsets;
					wsets = awsets;
				}
				wsets[cwp].pitem = Pitem(prd, 0, prd[0], -prd[len prd-1]);
				wsets[cwp].flag = 1;
				wsets[cwp].ws = mkset();
				if(!nolook) {
					work = 1;
					wsets[cwp].ws[0:] = clset;
				}
				cwp++;
			}
		}
	}

	# have computed closure; flags are reset; return
	if(cldebug && foutput != nil) {
		foutput.puts("\nState " + string i + ", nolook = " + string nolook + "\n");
		for(u:=0; u<cwp; u++) {
			if(wsets[u].flag)
				foutput.puts("flag set!\n");
			wsets[u].flag = 0;
			foutput.putc('\t');
			foutput.puts(writem(wsets[u].pitem));
			prlook(wsets[u].ws);
			foutput.putc('\n');
		}
	}
}

#
# sorts last state,and sees if it equals earlier ones. returns state number
#
state(c: int): int
{
	zzstate++;
	p1 := pstate[nstate];
	p2 := pstate[nstate+1];
	if(p1 == p2)
		return 0;	# null state
	# sort the items
	k, l: int;
	for(k = p1+1; k < p2; k++) {	# make k the biggest
		for(l = k; l > p1; l--) {
			if(statemem[l].pitem.prodno < statemem[l-1].pitem.prodno
			|| statemem[l].pitem.prodno == statemem[l-1].pitem.prodno
			&& statemem[l].pitem.off < statemem[l-1].pitem.off) {
				s := statemem[l];
				statemem[l] = statemem[l-1];
				statemem[l-1] = s;
			}else
				break;
		}
	}

	size1 := p2 - p1;	# size of state

	if(c >= NTBASE)
		i := ntstates[c-NTBASE];
	else
		i = tstates[c];

look:	for(; i != 0; i = mstates[i]) {
		# get ith state
		q1 := pstate[i];
		q2 := pstate[i+1];
		size2 := q2 - q1;
		if(size1 != size2)
			continue;
		k = p1;
		for(l = q1; l < q2; l++) {
			if(statemem[l].pitem.prod != statemem[k].pitem.prod
			|| statemem[l].pitem.off != statemem[k].pitem.off)
				continue look;
			k++;
		}

		# found it
		pstate[nstate+1] = pstate[nstate];	# delete last state
		# fix up lookaheads
		if(nolook)
			return i;
		k = p1;
		for(l = q1; l < q2; l++) {
			if(setunion(statemem[l].look, statemem[k].look))
				tystate[i] = MUSTDO;
			k++;
		}
		return i;
	}
	# state is new
	zznewstate++;
	if(nolook)
		error("yacc state/nolook error");
	pstate[nstate+2] = p2;
	if(nstate+1 >= NSTATES)
		error("too many states");
	if(c >= NTBASE) {
		mstates[nstate] = ntstates[c-NTBASE];
		ntstates[c-NTBASE] = nstate;
	} else {
		mstates[nstate] = tstates[c];
		tstates[c] = nstate;
	}
	tystate[nstate] = MUSTDO;
	return nstate++;
}

putitem(p: Pitem, set: Lkset)
{
	p.off++;
	p.first = p.prod[p.off];

	if(pidebug && foutput != nil)
		foutput.puts("putitem(" + writem(p) + "), state " + string nstate + "\n");
	j := pstate[nstate+1];
	if(j >= len statemem){
		asm := array[j + STATEINC] of Item;
		asm[0:] = statemem;
		statemem = asm;
	}
	statemem[j].pitem = p;
	if(!nolook){
		s := mkset();
		s[0:] = set;
		statemem[j].look = s;
	}
	j++;
	pstate[nstate+1] = j;
}

#
# creates output string for item pointed to by pp
#
writem(pp: Pitem): string
{
	i: int;
	p := pp.prod;
	q := chcopy(nontrst[prdptr[pp.prodno][0]-NTBASE].name) + ": ";
	npi := pp.off;
	pi := p == prdptr[pp.prodno];
	for(;;){
		c := ' ';
		if(pi == npi)
			c = '.';
		q[len q] = c;
		i = p[pi++];
		if(i <= 0)
			break;
		q += chcopy(symnam(i));
	}

	# an item calling for a reduction
	i = p[npi];
	if(i < 0)
		q += "    (" + string -i + ")";
	return q;
}

#
# pack state i from temp1 into amem
#
apack(p: array of int, n: int): int
{
	#
	# we don't need to worry about checking because
	# we will only look at entries known to be there...
	# eliminate leading and trailing 0's
	#
	off := 0;
	for(pp := 0; pp <= n && p[pp] == 0; pp++)
		off--;
 	# no actions
	if(pp > n)
		return 0;
	for(; n > pp && p[n] == 0; n--)
		;
	p = p[pp:n+1];

	# now, find a place for the elements from p to q, inclusive
	r := len amem - len p;
nextk:	for(rr := 0; rr <= r; rr++) {
		qq := rr;
		for(pp = 0; pp < len p; pp++) {
			if(p[pp] != 0)
				if(p[pp] != amem[qq] && amem[qq] != 0)
					continue nextk;
			qq++;
		}

		# we have found an acceptable k
		if(pkdebug && foutput != nil)
			foutput.puts("off = " + string(off+rr) + ", k = " + string rr + "\n");
		qq = rr;
		for(pp = 0; pp < len p; pp++) {
			if(p[pp]) {
				if(qq > memp)
					memp = qq;
				amem[qq] = p[pp];
			}
			qq++;
		}
		if(pkdebug && foutput != nil) {
			for(pp = 0; pp <= memp; pp += 10) {
				foutput.putc('\t');
				for(qq = pp; qq <= pp+9; qq++)
					foutput.puts(string amem[qq] + " ");
				foutput.putc('\n');
			}
		}
		return off + rr;
	}
	error("no space in action table");
	return 0;
}

#
# print the output for the states
#
output()
{
	c, u, v: int;

	ftable.puts("yyexca := array[] of {");
	if(fdebug != nil)
		fdebug.puts("yystates = array [] of {\n");

	noset := mkset();

	# output the stuff for state i
	for(i:=0; i<nstate; i++) {
		nolook = tystate[i]!=MUSTLOOKAHEAD;
		closure(i);

		# output actions
		nolook = 1;
		aryfil(temp1, ntokens+nnonter+1, 0);
		for(u=0; u<cwp; u++) {
			c = wsets[u].pitem.first;
			if(c > 1 && c < NTBASE && temp1[c] == 0) {
				for(v=u; v<cwp; v++)
					if(c == wsets[v].pitem.first)
						putitem(wsets[v].pitem, noset);
				temp1[c] = state(c);
			} else
				if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
					temp1[c+ntokens] = amem[indgo[i]+c];
		}
		if(i == 1)
			temp1[1] = ACCEPTCODE;

		# now, we have the shifts; look at the reductions
		lastred = 0;
		for(u=0; u<cwp; u++) {
			c = wsets[u].pitem.first;

			# reduction
			if(c > 0)
				continue;
			lastred = -c;
			us := wsets[u].ws;
			for(k:=0; k<=ntokens; k++) {
				if(!bitset(us, k))
					continue;
				if(temp1[k] == 0)
					temp1[k] = c;
				else
				if(temp1[k] < 0) { # reduce/reduce conflict
					if(foutput != nil)
						foutput.puts(
							"\n" + string i + ": reduce/reduce conflict  (red'ns "
							+ string -temp1[k] + " and " + string lastred + " ) on " + symnam(k));
					if(-temp1[k] > lastred)
						temp1[k] = -lastred;
					zzrrconf++;
				} else
					# potential shift/reduce conflict
					precftn(lastred, k, i);
			}
		}
		wract(i);
	}

	if(fdebug != nil)
		fdebug.puts("};\n");
	ftable.puts("};\n");
	ftable.puts("YYNPROD: con " + string nprod + ";\n");
	ftable.puts("YYPRIVATE: con " + string PRIVATE + ";\n");
	ftable.puts("yytoknames: array of string;\n");
	ftable.puts("yystates: array of string;\n");
	if(yydebug != nil){
		ftable.puts("include \"y.debug\";\n");
		ftable.puts("yydebug: con " + yydebug + ";\n");
	}else{
		ftable.puts("yydebug: con 0;\n");
	}
}

#
# decide a shift/reduce conflict by precedence.
# r is a rule number, t a token number
# the conflict is in state s
# temp1[t] is changed to reflect the action
#
precftn(r, t, s: int)
{
	action: int;

	lp := levprd[r];
	lt := toklev[t];
	if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {

		# conflict
		if(foutput != nil)
			foutput.puts(
				"\n" + string s + ": shift/reduce conflict (shift "
				+ string temp1[t] + "(" + string PLEVEL(lt) + "), red'n "
				+ string r + "(" + string PLEVEL(lp) + ")) on " + symnam(t));
		zzsrconf++;
		return;
	}
	if(PLEVEL(lt) == PLEVEL(lp))
		action = ASSOC(lt);
	else if(PLEVEL(lt) > PLEVEL(lp))
		action = RASC;  # shift
	else
		action = LASC;  # reduce
	case action{
	BASC =>  # error action
		temp1[t] = ERRCODE;
	LASC =>  # reduce
		temp1[t] = -r;
	}
}

#
# output state i
# temp1 has the actions, lastred the default
#
wract(i: int)
{
	p, p1: int;

	# find the best choice for lastred
	lastred = 0;
	ntimes := 0;
	for(j:=0; j<=ntokens; j++) {
		if(temp1[j] >= 0)
			continue;
		if(temp1[j]+lastred == 0)
			continue;
		# count the number of appearances of temp1[j]
		count := 0;
		tred := -temp1[j];
		levprd[tred] |= REDFLAG;
		for(p=0; p<=ntokens; p++)
			if(temp1[p]+tred == 0)
				count++;
		if(count > ntimes) {
			lastred = tred;
			ntimes = count;
		}
	}

	#
	# for error recovery, arrange that, if there is a shift on the
	# error recovery token, `error', that the default be the error action
	#
	if(temp1[2] > 0)
		lastred = 0;

	# clear out entries in temp1 which equal lastred
	# count entries in optst table
	n := 0;
	for(p=0; p<=ntokens; p++) {
		p1 = temp1[p];
		if(p1+lastred == 0)
			temp1[p] = p1 = 0;
		if(p1 > 0 && p1 != ACCEPTCODE && p1 != ERRCODE)
			n++;
	}

	wrstate(i);
	defact[i] = lastred;
	flag := 0;
	os := array[n*2] of int;
	n = 0;
	for(p=0; p<=ntokens; p++) {
		if((p1=temp1[p]) != 0) {
			if(p1 < 0) {
				p1 = -p1;
			} else if(p1 == ACCEPTCODE) {
				p1 = -1;
			} else if(p1 == ERRCODE) {
				p1 = 0;
			} else {
				os[n++] = p;
				os[n++] = p1;
				zzacent++;
				continue;
			}
			if(flag++ == 0)
				ftable.puts("-1, " + string i + ",\n");
			ftable.puts("\t" + string p + ", " + string p1 + ",\n");
			zzexcp++;
		}
	}
	if(flag) {
		defact[i] = -2;
		ftable.puts("\t-2, " + string lastred + ",\n");
	}
	optst[i] = os;
}

#
# writes state i
#
wrstate(i: int)
{
	j0, j1, u: int;
	pp, qq: int;

	if(fdebug != nil) {
		if(lastred) {
			fdebug.puts("	nil, #" + string i + "\n");
		} else {
			fdebug.puts("	\"");
			qq = pstate[i+1];
			for(pp=pstate[i]; pp<qq; pp++){
				fdebug.puts(writem(statemem[pp].pitem));
				fdebug.puts("\\n");
			}
			if(tystate[i] == MUSTLOOKAHEAD)
				for(u = pstate[i+1] - pstate[i]; u < cwp; u++)
					if(wsets[u].pitem.first < 0){
						fdebug.puts(writem(wsets[u].pitem));
						fdebug.puts("\\n");
					}
			fdebug.puts("\", #" + string i + "/\n");
		}
	}
	if(foutput == nil)
		return;
	foutput.puts("\nstate " + string i + "\n");
	qq = pstate[i+1];
	for(pp=pstate[i]; pp<qq; pp++){
		foutput.putc('\t');
		foutput.puts(writem(statemem[pp].pitem));
		foutput.putc('\n');
	}
	if(tystate[i] == MUSTLOOKAHEAD) {
		# print out empty productions in closure
		for(u = pstate[i+1] - pstate[i]; u < cwp; u++) {
			if(wsets[u].pitem.first < 0) {
				foutput.putc('\t');
				foutput.puts(writem(wsets[u].pitem));
				foutput.putc('\n');
			}
		}
	}

	# check for state equal to another
	for(j0=0; j0<=ntokens; j0++)
		if((j1=temp1[j0]) != 0) {
			foutput.puts("\n\t" + symnam(j0) + "  ");
			# shift, error, or accept
			if(j1 > 0) {
				if(j1 == ACCEPTCODE)
					foutput.puts("accept");
				else if(j1 == ERRCODE)
					foutput.puts("error");
				else
					foutput.puts("shift "+string j1);
			} else
				foutput.puts("reduce " + string -j1 + " (src line " + string rlines[-j1] + ")");
		}

	# output the final production
	if(lastred)
		foutput.puts("\n\t.  reduce " + string lastred + " (src line " + string rlines[lastred] + ")\n\n");
	else
		foutput.puts("\n\t.  error\n\n");

	# now, output nonterminal actions
	j1 = ntokens;
	for(j0 = 1; j0 <= nnonter; j0++) {
		j1++;
		if(temp1[j1])
			foutput.puts("\t" + symnam(j0+NTBASE) + "  goto " + string temp1[j1] + "\n");
	}
}

#
# output the gotos for the nontermninals
#
go2out()
{
	for(i := 1; i <= nnonter; i++) {
		go2gen(i);

		# find the best one to make default
		best := -1;
		times := 0;

		# is j the most frequent
		for(j := 0; j < nstate; j++) {
			if(tystate[j] == 0)
				continue;
			if(tystate[j] == best)
				continue;

			# is tystate[j] the most frequent
			count := 0;
			cbest := tystate[j];
			for(k := j; k < nstate; k++)
				if(tystate[k] == cbest)
					count++;
			if(count > times) {
				best = cbest;
				times = count;
			}
		}

		# best is now the default entry
		zzgobest += times-1;
		n := 0;
		for(j = 0; j < nstate; j++)
			if(tystate[j] != 0 && tystate[j] != best)
				n++;
		goent := array[2*n+1] of int;
		n = 0;
		for(j = 0; j < nstate; j++)
			if(tystate[j] != 0 && tystate[j] != best) {
				goent[n++] = j;
				goent[n++] = tystate[j];
				zzgoent++;
			}

		# now, the default
		if(best == -1)
			best = 0;
		zzgoent++;
		goent[n] = best;
		yypgo[i] = goent;
	}
}

#
# output the gotos for nonterminal c
#
go2gen(c: int)
{
	i, cc, p, q: int;

	# first, find nonterminals with gotos on c
	aryfil(temp1, nnonter+1, 0);
	temp1[c] = 1;
	work := 1;
	while(work) {
		work = 0;
		for(i=0; i<nprod; i++) {
			# cc is a nonterminal with a goto on c
			cc = prdptr[i][1]-NTBASE;
			if(cc >= 0 && temp1[cc] != 0) {
				# thus, the left side of production i does too
				cc = prdptr[i][0]-NTBASE;
				if(temp1[cc] == 0) {
					  work = 1;
					  temp1[cc] = 1;
				}
			}
		}
	}

	# now, we have temp1[c] = 1 if a goto on c in closure of cc
	if(g2debug && foutput != nil) {
		foutput.puts(nontrst[c].name);
		foutput.puts(": gotos on ");
		for(i=0; i<=nnonter; i++)
			if(temp1[i]){
				foutput.puts(nontrst[i].name);
				foutput.putc(' ');
			}
		foutput.putc('\n');
	}

	# now, go through and put gotos into tystate
	aryfil(tystate, nstate, 0);
	for(i=0; i<nstate; i++) {
		q = pstate[i+1];
		for(p=pstate[i]; p<q; p++) {
			if((cc = statemem[p].pitem.first) >= NTBASE) {
				# goto on c is possible
				if(temp1[cc-NTBASE]) {
					tystate[i] = amem[indgo[i]+c];
					break;
				}
			}
		}
	}
}

#
# in order to free up the mem and amem arrays for the optimizer,
# and still be able to output yyr1, etc., after the sizes of
# the action array is known, we hide the nonterminals
# derived by productions in levprd.
#
hideprod()
{
	j := 0;
	levprd[0] = 0;
	for(i:=1; i<nprod; i++) {
		if(!(levprd[i] & REDFLAG)) {
			j++;
			if(foutput != nil) {
				foutput.puts("Rule not reduced:   ");
				foutput.puts(writem(Pitem(prdptr[i], 0, 0, i)));
				foutput.putc('\n');
			}
		}
		levprd[i] = prdptr[i][0] - NTBASE;
	}
	if(j)
		print("%d rules never reduced\n", j);
}

callopt()
{
	j, k, p, q: int;
	v: array of int;

	pgo = array[nnonter+1] of int;
	pgo[0] = 0;
	maxoff = 0;
	maxspr = 0;
	for(i := 0; i < nstate; i++) {
		k = 32000;
		j = 0;
		v = optst[i];
		q = len v;
		for(p = 0; p < q; p += 2) {
			if(v[p] > j)
				j = v[p];
			if(v[p] < k)
				k = v[p];
		}
		# nontrivial situation
		if(k <= j) {
			# j is now the range
#			j -= k;			# call scj
			if(k > maxoff)
				maxoff = k;
		}
		tystate[i] = q + 2*j;
		if(j > maxspr)
			maxspr = j;
	}

	# initialize ggreed table
	ggreed = array[nnonter+1] of int;
	for(i = 1; i <= nnonter; i++) {
		ggreed[i] = 1;
		j = 0;

		# minimum entry index is always 0
		v = yypgo[i];
		q = len v - 1;
		for(p = 0; p < q ; p += 2) {
			ggreed[i] += 2;
			if(v[p] > j)
				j = v[p];
		}
		ggreed[i] = ggreed[i] + 2*j;
		if(j > maxoff)
			maxoff = j;
	}

	# now, prepare to put the shift actions into the amem array
	for(i = 0; i < ACTSIZE; i++)
		amem[i] = 0;
	maxa = 0;
	for(i = 0; i < nstate; i++) {
		if(tystate[i] == 0 && adb > 1)
			ftable.puts("State " + string i + ": null\n");
		indgo[i] = YYFLAG1;
	}
	while((i = nxti()) != NOMORE)
		if(i >= 0)
			stin(i);
		else
			gin(-i);

	# print amem array
	if(adb > 2)
		for(p = 0; p <= maxa; p += 10) {
			ftable.puts(string p + "  ");
			for(i = 0; i < 10; i++)
				ftable.puts(string amem[p+i] + "  ");
			ftable.putc('\n');
		}

	aoutput();
	osummary();
}

#
# finds the next i
#
nxti(): int
{
	max := 0;
	maxi := 0;
	for(i := 1; i <= nnonter; i++)
		if(ggreed[i] >= max) {
			max = ggreed[i];
			maxi = -i;
		}
	for(i = 0; i < nstate; i++)
		if(tystate[i] >= max) {
			max = tystate[i];
			maxi = i;
		}
	if(max == 0)
		return NOMORE;
	return maxi;
}

gin(i: int)
{
	s: int;

	# enter gotos on nonterminal i into array amem
	ggreed[i] = 0;

	q := yypgo[i];
	nq := len q - 1;
	# now, find amem place for it
nextgp:	for(p := 0; p < ACTSIZE; p++) {
		if(amem[p])
			continue;
		for(r := 0; r < nq; r += 2) {
			s = p + q[r] + 1;
			if(s > maxa){
				maxa = s;
				if(maxa >= ACTSIZE)
					error("a array overflow");
			}
			if(amem[s])
				continue nextgp;
		}
		# we have found amem spot
		amem[p] = q[nq];
		if(p > maxa)
			maxa = p;
		for(r = 0; r < nq; r += 2) {
			s = p + q[r] + 1;
			amem[s] = q[r+1];
		}
		pgo[i] = p;
		if(adb > 1)
			ftable.puts("Nonterminal " + string i + ", entry at " + string pgo[i] + "\n");
		return;
	}
	error("cannot place goto " + string i + "\n");
}

stin(i: int)
{
	s: int;

	tystate[i] = 0;

	# enter state i into the amem array
	q := optst[i];
	nq := len q;
	# find an acceptable place
nextn:	for(n := -maxoff; n < ACTSIZE; n++) {
		flag := 0;
		for(r := 0; r < nq; r += 2) {
			s = q[r] + n;
			if(s < 0 || s > ACTSIZE)
				continue nextn;
			if(amem[s] == 0)
				flag++;
			else if(amem[s] != q[r+1])
				continue nextn;
		}

		# check the position equals another only if the states are identical
		for(j:=0; j<nstate; j++) {
			if(indgo[j] == n) {

				# we have some disagreement
				if(flag)
					continue nextn;
				if(nq == len optst[j]) {

					# states are equal
					indgo[i] = n;
					if(adb > 1)
						ftable.puts("State " + string i + ": entry at "
							+ string n + " equals state " + string j + "\n");
					return;
				}

				# we have some disagreement
				continue nextn;
			}
		}

		for(r = 0; r < nq; r += 2) {
			s = q[r] + n;
			if(s > maxa)
				maxa = s;
			if(amem[s] != 0 && amem[s] != q[r+1])
				error("clobber of a array, pos'n " + string s + ", by " + string q[r+1] + "");
			amem[s] = q[r+1];
		}
		indgo[i] = n;
		if(adb > 1)
			ftable.puts("State " + string i + ": entry at " + string indgo[i] + "\n");
		return;
	}
	error("Error; failure to place state " + string i + "\n");
}

#
# this version is for limbo
# write out the optimized parser
#
aoutput()
{
	ftable.puts("YYLAST:\tcon "+string (maxa+1)+";\n");
	arout("yyact", amem, maxa+1);
	arout("yypact", indgo, nstate);
	arout("yypgo", pgo, nnonter+1);
}

#
# put out other arrays, copy the parsers
#
others()
{
	finput = bufio->open(parser, Bufio->OREAD);
	if(finput == nil)
		error("cannot find parser " + parser);
	arout("yyr1", levprd, nprod);
	aryfil(temp1, nprod, 0);

	#
	#yyr2 is the number of rules for each production
	#
	for(i:=1; i<nprod; i++)
		temp1[i] = len prdptr[i] - 2;
	arout("yyr2", temp1, nprod);

	aryfil(temp1, nstate, -1000);
	for(i=0; i<=ntokens; i++)
		for(j:=tstates[i]; j!=0; j=mstates[j])
			temp1[j] = i;
	for(i=0; i<=nnonter; i++)
		for(j=ntstates[i]; j!=0; j=mstates[j])
			temp1[j] = -i;
	arout("yychk", temp1, nstate);
	arout("yydef", defact, nstate);

	# put out token translation tables
	# table 1 has 0-256
	aryfil(temp1, 256, 0);
	c := 0;
	for(i=1; i<=ntokens; i++) {
		j = tokset[i].value;
		if(j >= 0 && j < 256) {
			if(temp1[j]) {
				print("yacc bug -- cant have 2 different Ts with same value\n");
				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
				nerrors++;
			}
			temp1[j] = i;
			if(j > c)
				c = j;
		}
	}
	for(i = 0; i <= c; i++)
		if(temp1[i] == 0)
			temp1[i] = YYLEXUNK;
	arout("yytok1", temp1, c+1);

	# table 2 has PRIVATE-PRIVATE+256
	aryfil(temp1, 256, 0);
	c = 0;
	for(i=1; i<=ntokens; i++) {
		j = tokset[i].value - PRIVATE;
		if(j >= 0 && j < 256) {
			if(temp1[j]) {
				print("yacc bug -- cant have 2 different Ts with same value\n");
				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
				nerrors++;
			}
			temp1[j] = i;
			if(j > c)
				c = j;
		}
	}
	arout("yytok2", temp1, c+1);

	# table 3 has everything else
	ftable.puts("yytok3 := array[] of {\n");
	c = 0;
	for(i=1; i<=ntokens; i++) {
		j = tokset[i].value;
		if(j >= 0 && j < 256)
			continue;
		if(j >= PRIVATE && j < 256+PRIVATE)
			continue;

		ftable.puts(sprint("%4d,%4d,", j, i));
		c++;
		if(c%5 == 0)
			ftable.putc('\n');
	}
	ftable.puts(sprint("%4d\n};\n", 0));

	# copy parser text
	while((c=finput.getc()) != Bufio->EOF) {
		if(c == '$') {
			if((c = finput.getc()) != 'A')
				ftable.putc('$');
			else { # copy actions
				if(codehead == nil)
					ftable.puts("* => ;");
				else
					dumpcode(-1);
				c = finput.getc();
			}
		}
		ftable.putc(c);
	}
	ftable.close();
}

arout(s: string, v: array of int, n: int)
{
	ftable.puts(s+" := array[] of {");
	for(i := 0; i < n; i++) {
		if(i%10 == 0)
			ftable.putc('\n');
		ftable.puts(sprint("%4d", v[i]));
		ftable.putc(',');
	}
	ftable.puts("\n};\n");
}

#
# output the summary on y.output
#
summary()
{
	if(foutput != nil) {
		foutput.puts("\n" + string ntokens + " terminals, " + string(nnonter + 1) + " nonterminals\n");
		foutput.puts("" + string nprod + " grammar rules, " + string nstate + "/" + string NSTATES + " states\n");
		foutput.puts("" + string zzsrconf + " shift/reduce, " + string zzrrconf + " reduce/reduce conflicts reported\n");
		foutput.puts("" + string len wsets + " working sets used\n");
		foutput.puts("memory: parser " + string memp + "/" + string ACTSIZE + "\n");
		foutput.puts(string (zzclose - 2*nstate) + " extra closures\n");
		foutput.puts(string zzacent + " shift entries, " + string zzexcp + " exceptions\n");
		foutput.puts(string zzgoent + " goto entries\n");
		foutput.puts(string zzgobest + " entries saved by goto default\n");
	}
	if(zzsrconf != 0 || zzrrconf != 0) {
		print("\nconflicts: ");
		if(zzsrconf)
			print("%d shift/reduce", zzsrconf);
		if(zzsrconf && zzrrconf)
			print(", ");
		if(zzrrconf)
			print("%d reduce/reduce", zzrrconf);
		print("\n");
	}
	if(fdefine != nil)
		fdefine.close();
}

#
# write optimizer summary
#
osummary()
{
	if(foutput == nil)
		return;
	i := 0;
	for(p := maxa; p >= 0; p--)
		if(amem[p] == 0)
			i++;

	foutput.puts("Optimizer space used: output " + string (maxa+1) + "/" + string ACTSIZE + "\n");
	foutput.puts(string(maxa+1) + " table entries, " + string i + " zero\n");
	foutput.puts("maximum spread: " + string maxspr + ", maximum offset: " + string maxoff + "\n");
}

#
# copies and protects "'s in q
#
chcopy(q: string): string
{
	s := "";
	j := 0;
	for(i := 0; i < len q; i++) {
		if(q[i] == '"') {
			s += q[j:i] + "\\";
			j = i;
		}
	}
	return s + q[j:i];
}

usage()
{
	fprint(stderr, "usage: yacc [-vd] [-Dn] [-o output] [-s stem] file\n");
	exit;
}

bitset(set: Lkset, bit: int): int
{
	return set[bit>>5] & (1<<(bit&31));
}

setbit(set: Lkset, bit: int): int
{
	return set[bit>>5] |= (1<<(bit&31));
}

mkset(): Lkset
{
	return array[tbitset] of {* => 0};
}

#
# set a to the union of a and b
# return 1 if b is not a subset of a, 0 otherwise
#
setunion(a, b: array of int): int
{
	sub := 0;
	for(i:=0; i<tbitset; i++) {
		x := a[i];
		y := x | b[i];
		a[i] = y;
		if(y != x)
			sub = 1;
	}
	return sub;
}

prlook(p: Lkset)
{
	if(p == nil){
		foutput.puts("\tNULL");
		return;
	}
	foutput.puts(" { ");
	for(j:=0; j<=ntokens; j++){
		if(bitset(p, j)){
			foutput.puts(symnam(j));
			foutput.putc(' ');
		}
	}
	foutput.putc('}');
}

#
# utility routines
#
isdigit(c: int): int
{
	return c >= '0' && c <= '9';
}

isword(c: int): int
{
	return c >= 16ra0 || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
}

mktemp(t: string): string
{
	return t;
}

#
# arg processing
#
Arg.init(argv: list of string): ref Arg
{
	if(argv != nil)
		argv = tl argv;
	return ref Arg(argv, 0, "");
}

Arg.opt(arg: self ref Arg): int
{
	opts := arg.opts;
	if(opts != ""){
		arg.c = opts[0];
		arg.opts = opts[1:];
		return arg.c;
	}
	argv := arg.argv;
	if(argv == nil)
		return arg.c = 0;
	opts = hd argv;
	if(len opts < 2 || opts[0] != '-')
		return arg.c = 0;
	arg.argv = tl argv;
	if(opts == "--")
		return arg.c = 0;
	arg.opts = opts[2:];
	return arg.c = opts[1];
}

Arg.arg(arg: self ref Arg): string
{
	s := arg.opts;
	arg.opts = "";
	if(s != "")
		return s;
	argv := arg.argv;
	if(argv == nil)
		return "";
	arg.argv = tl argv;
	return hd argv;
}