shithub: rgbds

Download patch

ref: 2bf31870a7b9be374e24c20aaac13985b5e33f67
parent: 6198cc185c24ec4ba794ed27c969ae65d31f89c3
author: YamaArashi <[email protected]>
date: Fri Aug 22 17:44:18 EDT 2014

Cleaned up lexer
- separated the lexer into multiple functions so it is more readable
- fixed issue with long label names in macro arguments
- added error checking code to prevent buffer overflows

--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -5,7 +5,8 @@
 
 #include "asm/types.h"
 
-#define LEXHASHSIZE	512
+#define LEXHASHSIZE (1 << 11)
+#define MAXSTRLEN 255
 
 struct sLexInitString {
 	char *tzName;
@@ -18,7 +19,9 @@
 };
 
 struct yy_buffer_state {
-	char *pBufferStart;
+	char *pBufferRealStart; // actual starting address
+	char *pBufferStart; // address where the data is initially written
+	                    // after the "safety margin"
 	char *pBuffer;
 	ULONG nBufferSize;
 	ULONG oAtLineStart;
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -18,18 +18,18 @@
 	ULONG nNameLength;
 	struct sLexString *pNext;
 };
-#define pLexBuffer		(pCurrentBuffer->pBuffer)
-#define nLexBufferLeng	(pCurrentBuffer->nBufferSize)
+#define pLexBufferRealStart (pCurrentBuffer->pBufferRealStart)
+#define pLexBuffer		    (pCurrentBuffer->pBuffer)
+#define AtLineStart	        (pCurrentBuffer->oAtLineStart)
 
 #define SAFETYMARGIN	1024
 
-extern ULONG symvaluetostring(char *dest, char *s);
+extern size_t symvaluetostring(char *dest, size_t maxLength, char *sym);
 
 struct sLexFloat tLexFloat[32];
 struct sLexString *tLexHash[LEXHASHSIZE];
 YY_BUFFER_STATE pCurrentBuffer;
-ULONG yyleng;
-ULONG nLexMaxLeng;
+ULONG nLexMaxLength; // max length of all keywords and operators
 
 ULONG tFloatingSecondChar[256];
 ULONG tFloatingFirstChar[256];
@@ -37,8 +37,6 @@
 ULONG nFloating;
 enum eLexerState lexerstate = LEX_STATE_NORMAL;
 
-#define AtLineStart	pCurrentBuffer->oAtLineStart
-
 #ifdef __GNUC__
 void 
 strupr(char *s)
@@ -73,6 +71,9 @@
 void 
 yyunput(char c)
 {
+	if (pLexBuffer <= pLexBufferRealStart)
+		fatalerror("Buffer safety margin exceeded");
+
 	*(--pLexBuffer) = c;
 }
 
@@ -79,12 +80,15 @@
 void 
 yyunputstr(char *s)
 {
-	SLONG i;
+	int i, len;
 
-	i = strlen(s) - 1;
+	len = strlen(s);
 
-	while (i >= 0)
-		yyunput(s[i--]);
+	if (pLexBuffer - len < pLexBufferRealStart)
+		fatalerror("Buffer safety margin exceeded");
+
+	for (i = len - 1; i >= 0; i--)
+		*(--pLexBuffer) = s[i];
 }
 
 void 
@@ -114,10 +118,10 @@
 	if ((pBuffer =
 		(YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) !=
 	    NULL) {
-		if ((pBuffer->pBuffer = pBuffer->pBufferStart =
+		if ((pBuffer->pBufferRealStart =
 			(char *) malloc(size + 1 + SAFETYMARGIN)) != NULL) {
-			pBuffer->pBuffer += SAFETYMARGIN;
-			pBuffer->pBufferStart += SAFETYMARGIN;
+			pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN;
+			pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN;
 			memcpy(pBuffer->pBuffer, mem, size);
 			pBuffer->nBufferSize = size;
 			pBuffer->oAtLineStart = 1;
@@ -135,8 +139,7 @@
 	YY_BUFFER_STATE pBuffer;
 
 	if ((pBuffer =
-		(YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) !=
-	    NULL) {
+		(YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) != NULL) {
 		ULONG size;
 
 		fseek(f, 0, SEEK_END);
@@ -143,13 +146,13 @@
 		size = ftell(f);
 		fseek(f, 0, SEEK_SET);
 
-		if ((pBuffer->pBuffer = pBuffer->pBufferStart =
+		if ((pBuffer->pBufferRealStart =
 			(char *) malloc(size + 2 + SAFETYMARGIN)) != NULL) {
 			char *mem;
 			ULONG instring = 0;
 
-			pBuffer->pBuffer += SAFETYMARGIN;
-			pBuffer->pBufferStart += SAFETYMARGIN;
+			pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN;
+			pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN;
 
 			size = fread(pBuffer->pBuffer, sizeof(UBYTE), size, f);
 
@@ -167,7 +170,7 @@
 					mem += 1;
 				} else {
 					if ((mem[0] == 10 && mem[1] == 13)
-					    || (mem[0] == 13 && mem[1] == 10)) {
+					 || (mem[0] == 13 && mem[1] == 10)) {
 						mem[0] = ' ';
 						mem[1] = '\n';
 						mem += 2;
@@ -174,17 +177,12 @@
 					} else if (mem[0] == 10 || mem[0] == 13) {
 						mem[0] = '\n';
 						mem += 1;
-					} else if (mem[0] == '\n'
-					    && mem[1] == '*') {
+					} else if (mem[0] == '\n' && mem[1] == '*') {
 						mem += 1;
-						while (!
-						    (*mem == '\n'
-							|| *mem == '\0'))
+						while (!(*mem == '\n' || *mem == '\0'))
 							*mem++ = ' ';
 					} else if (*mem == ';') {
-						while (!
-						    (*mem == '\n'
-							|| *mem == '\0'))
+						while (!(*mem == '\n' || *mem == '\0'))
 							*mem++ = ' ';
 					} else
 						mem += 1;
@@ -199,17 +197,33 @@
 	return (NULL);
 }
 
-ULONG 
-lex_FloatAlloc(struct sLexFloat * tok)
+ULONG
+lex_FloatAlloc(struct sLexFloat *token)
 {
-	tLexFloat[nFloating] = (*tok);
+	tLexFloat[nFloating] = *token;
 
 	return (1 << (nFloating++));
 }
 
+/*
+ * Make sure that only non-zero ASCII characters are used. Also, check if the
+ * start is greater than the end of the range.
+ */
+void
+lex_CheckCharacterRange(UWORD start, UWORD end)
+{
+	if (start > end || start < 1 || end > 127) {
+		fprintf(stderr, "Invalid character range (start: %u, end: %u)\n",
+		        start, end);
+		exit(1);
+	}
+}
+
 void 
 lex_FloatDeleteRange(ULONG id, UWORD start, UWORD end)
 {
+	lex_CheckCharacterRange(start, end);
+
 	while (start <= end) {
 		tFloatingChars[start] &= ~id;
 		start += 1;
@@ -219,6 +233,8 @@
 void 
 lex_FloatAddRange(ULONG id, UWORD start, UWORD end)
 {
+	lex_CheckCharacterRange(start, end);
+
 	while (start <= end) {
 		tFloatingChars[start] |= id;
 		start += 1;
@@ -228,6 +244,8 @@
 void 
 lex_FloatDeleteFirstRange(ULONG id, UWORD start, UWORD end)
 {
+	lex_CheckCharacterRange(start, end);
+
 	while (start <= end) {
 		tFloatingFirstChar[start] &= ~id;
 		start += 1;
@@ -237,6 +255,8 @@
 void 
 lex_FloatAddFirstRange(ULONG id, UWORD start, UWORD end)
 {
+	lex_CheckCharacterRange(start, end);
+
 	while (start <= end) {
 		tFloatingFirstChar[start] |= id;
 		start += 1;
@@ -246,6 +266,8 @@
 void 
 lex_FloatDeleteSecondRange(ULONG id, UWORD start, UWORD end)
 {
+	lex_CheckCharacterRange(start, end);
+
 	while (start <= end) {
 		tFloatingSecondChar[start] &= ~id;
 		start += 1;
@@ -255,6 +277,8 @@
 void 
 lex_FloatAddSecondRange(ULONG id, UWORD start, UWORD end)
 {
+	lex_CheckCharacterRange(start, end);
+
 	while (start <= end) {
 		tFloatingSecondChar[start] |= id;
 		start += 1;
@@ -262,32 +286,32 @@
 }
 
 struct sLexFloat *
-lexgetfloat(ULONG id)
+lexgetfloat(ULONG nFloatMask)
 {
-	ULONG r = 0, mask = 1;
+	if (nFloatMask == 0) {
+		fatalerror("Internal error in lexgetfloat");
+	}
 
-	if (id == 0)
-		return (NULL);
+	int i = 0;
 
-	while ((id & mask) == 0) {
-		mask <<= 1;
-		r += 1;
+	while ((nFloatMask & 1) == 0) {
+		nFloatMask >>= 1;
+		i++;
 	}
 
-	return (&tLexFloat[r]);
+	return (&tLexFloat[i]);
 }
 
 ULONG 
 lexcalchash(char *s)
 {
-	ULONG r = 0;
+	ULONG hash = 0;
 
 	while (*s) {
-		r = ((r << 1) + (toupper(*s))) % LEXHASHSIZE;
-		s += 1;
+		hash = (hash * 283) ^ toupper(*s++);
 	}
 
-	return (r);
+	return (hash % LEXHASHSIZE);
 }
 
 void 
@@ -295,17 +319,17 @@
 {
 	ULONG i;
 
-	for (i = 0; i < LEXHASHSIZE; i += 1) {
+	for (i = 0; i < LEXHASHSIZE; i++) {
 		tLexHash[i] = NULL;
 	}
 
-	for (i = 0; i < 256; i += 1) {
+	for (i = 0; i < 256; i++) {
 		tFloatingFirstChar[i] = 0;
 		tFloatingSecondChar[i] = 0;
 		tFloatingChars[i] = 0;
 	}
 
-	nLexMaxLeng = 0;
+	nLexMaxLength = 0;
 	nFloating = 0;
 }
 
@@ -333,8 +357,8 @@
 
 				strupr((*ppHash)->tzName);
 
-				if ((*ppHash)->nNameLength > nLexMaxLeng)
-					nLexMaxLeng = (*ppHash)->nNameLength;
+				if ((*ppHash)->nNameLength > nLexMaxLength)
+					nLexMaxLength = (*ppHash)->nNameLength;
 
 			} else
 				fatalerror("Out of memory!");
@@ -345,458 +369,391 @@
 	}
 }
 
-ULONG 
-yylex(void)
+/*
+ * Gets the "float" mask and "float" length.
+ * "Float" refers to the token type of a token that is not a keyword.
+ * The character classes floatingFirstChar, floatingSecondChar, and
+ * floatingChars are defined separately for each token type.
+ * It uses bit masks to match against a set of simple regular expressions
+ * of the form /[floatingFirstChar]([floatingSecondChar][floatingChars]*)?/.
+ * The token types with the longest match from the current position in the
+ * buffer will have their bits set in the float mask.
+ */
+void
+yylex_GetFloatMaskAndFloatLen(ULONG *pnFloatMask, ULONG *pnFloatLen)
 {
-	ULONG hash, maxlen;
-	char *s;
-	struct sLexString *pLongestFixed = NULL;
-	ULONG nFloatMask, nOldFloatMask, nFloatLen;
-	ULONG linestart = AtLineStart;
+	// Note that '\0' should always have a bit mask of 0 in the "floating"
+	// tables, so it doesn't need to be checked for separately.
 
-	switch (lexerstate) {
-	case LEX_STATE_NORMAL:
-		AtLineStart = 0;
+	char *s = pLexBuffer;
+	ULONG nOldFloatMask = 0;
+	ULONG nFloatMask = tFloatingFirstChar[(int)*s];
 
-scanagain:
+	if (nFloatMask != 0) {
+		s++;
+		nOldFloatMask = nFloatMask;
+		nFloatMask &= tFloatingSecondChar[(int)*s];
 
-		while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
-			linestart = 0;
-			pLexBuffer += 1;
+		while (nFloatMask != 0) {
+			s++;
+			nOldFloatMask = nFloatMask;
+			nFloatMask &= tFloatingChars[(int)*s];
 		}
+	}
 
-		if (*pLexBuffer == 0) {
-			if (yywrap() == 0) {
-				linestart = AtLineStart;
-				AtLineStart = 0;
-				goto scanagain;
+	*pnFloatMask = nOldFloatMask;
+	*pnFloatLen = (ULONG)(s - pLexBuffer);
+}
+
+/*
+ * Gets the longest keyword/operator from the current position in the buffer.
+ */
+struct sLexString *
+yylex_GetLongestFixed()
+{
+	struct sLexString *pLongestFixed = NULL;
+	char *s = pLexBuffer;
+	ULONG hash = 0;
+	ULONG length = 0;
+
+	while (length < nLexMaxLength && *s) {
+		hash = (hash * 283) ^ toupper(*s);
+		s++;
+		length++;
+
+		struct sLexString *lex = tLexHash[hash % LEXHASHSIZE];
+
+		while (lex) {
+			if (lex->nNameLength == length
+			 && strncasecmp(pLexBuffer, lex->tzName, length) == 0) {
+				pLongestFixed = lex;
+				break;
 			}
+			lex = lex->pNext;
 		}
-		s = pLexBuffer;
-		nOldFloatMask = nFloatLen = 0;
-		nFloatMask = tFloatingFirstChar[(int) *s++];
-		while (nFloatMask && nFloatLen < nLexBufferLeng) {
-			nFloatLen += 1;
-			nOldFloatMask = nFloatMask;
-			if (nFloatLen == 1)
-				nFloatMask &= tFloatingSecondChar[(int) *s++];
-			else
-				nFloatMask &= tFloatingChars[(int) *s++];
+	}
+
+	return pLongestFixed;
+}
+
+size_t
+CopyMacroArg(char *dest, size_t maxLength, char c)
+{
+	int i;
+	char *s;
+	int argNum;
+	
+	switch (c) {
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+		argNum = c - '0';
+		break;
+	case '@':
+		argNum = -1;
+		break;
+	default:
+		return 0;
+	}
+
+	if ((s = sym_FindMacroArg(argNum)) == NULL)
+		fatalerror("Macro argument not defined");
+
+	for (i = 0; s[i] != 0; i++) {
+		if (i >= maxLength) {
+			fatalerror("Macro argument too long to fit buffer");
 		}
+		dest[i] = s[i];
+	}
 
-		maxlen = nLexBufferLeng;
-		if (nLexMaxLeng < maxlen)
-			maxlen = nLexMaxLeng;
+	return i;
+}
 
-		yyleng = 0;
-		hash = 0;
-		s = pLexBuffer;
-		while (yyleng < nLexMaxLeng) {
-			/* XXX: Kludge warning! The dereference of s below
-			 * may go beyond the end of the buffer. We use the
-			 * following test to stop that from happening,
-			 * without really understanding what the rest of
-			 * the code is doing. This may not be the correct
-			 * fix! */
-			if (!*s)
+static inline void
+yylex_StringWriteChar(char *s, size_t index, char c)
+{
+	if (index >= MAXSTRLEN) {
+		fatalerror("String too long");
+	}
+
+	s[index] = c;
+}
+
+static inline void
+yylex_SymbolWriteChar(char *s, size_t index, char c)
+{
+	if (index >= MAXSYMLEN) {
+		fatalerror("Symbol too long");
+	}
+
+	s[index] = c;
+}
+
+/*
+ * Trims white space at the end of a string.
+ * The index parameter is the index of the 0 at the end of the string.
+ */
+void yylex_TrimEnd(char *s, size_t index)
+{
+	int i;
+
+	for (i = (int)index - 1; i >= 0 && (s[i] == ' ' || s[i] == '\t'); i--)
+		s[i] = 0;
+}
+
+size_t
+yylex_ReadBracketedSymbol(char *dest, size_t index)
+{
+	char sym[MAXSYMLEN + 1];
+	char ch;
+	size_t i = 0;
+	size_t length, maxLength;
+
+	for (ch = *pLexBuffer;
+	     ch != '}' && ch != '"' && ch != '\n';
+		 ch = *(++pLexBuffer)) {
+		if (ch == '\\') {
+			ch = *(++pLexBuffer);
+			maxLength = MAXSYMLEN - i;
+			length = CopyMacroArg(&sym[i], maxLength, ch);
+
+			if (length != 0)
+				i += length;
+			else
+				fatalerror("Illegal character escape '%c'", ch);
+		} else
+			yylex_SymbolWriteChar(sym, i++, ch);
+	}
+
+	yylex_SymbolWriteChar(sym, i, 0);
+
+	maxLength = MAXSTRLEN - index; // it's assumed we're writing to a T_STRING
+	length = symvaluetostring(&dest[index], maxLength, sym);
+
+	if (*pLexBuffer == '}')
+		pLexBuffer++;
+	else
+		yyerror("Missing }");
+
+	return length;
+}
+
+void
+yylex_ReadQuotedString()
+{
+	size_t index = 0;
+	size_t length, maxLength;
+
+	while (*pLexBuffer != '"' && *pLexBuffer != '\n') {
+		char ch = *pLexBuffer++;
+
+		if (ch == '\\') {
+			ch = *pLexBuffer++;
+
+			switch (ch) {
+			case 'n':
+				ch = '\n';
 				break;
+			case 't':
+				ch = '\t';
+				break;
+			case '\\':
+				ch = '\\';
+				break;
+			case '"':
+				ch = '"';
+				break;
+			default:
+				maxLength = MAXSTRLEN - index;
+				length = CopyMacroArg(&yylval.tzString[index], maxLength, ch);
 
-			yyleng += 1;
-			hash = ((hash << 1) + (toupper(*s))) % LEXHASHSIZE;
-			s += 1;
-			if (tLexHash[hash]) {
-				struct sLexString *lex;
+				if (length != 0)
+					index += length;
+				else
+					fatalerror("Illegal character escape '%c'", ch);
 
-				lex = tLexHash[hash];
-				while (lex) {
-					if (lex->nNameLength == yyleng) {
-						if (strncasecmp
-						    (pLexBuffer, lex->tzName,
-							yyleng) == 0) {
-							pLongestFixed = lex;
-						}
-					}
-					lex = lex->pNext;
-				}
+				ch = 0;
+				break;
 			}
+		} else if (ch == '{') {
+			// Get bracketed symbol within string.
+			index += yylex_ReadBracketedSymbol(yylval.tzString, index);
+			ch = 0;
 		}
 
-		if (nFloatLen == 0 && pLongestFixed == NULL) {
-			if (*pLexBuffer == '"') {
-				ULONG index = 0;
+		if (ch)
+			yylex_StringWriteChar(yylval.tzString, index++, ch);
+	}
 
-				pLexBuffer += 1;
-				while ((*pLexBuffer != '"')
-				    && (*pLexBuffer != '\n')) {
-					char ch, *marg;
+	yylex_StringWriteChar(yylval.tzString, index, 0);
 
-					if ((ch = *pLexBuffer++) == '\\') {
-						switch (ch = (*pLexBuffer++)) {
-						case 'n':
-							ch = '\n';
-							break;
-						case 't':
-							ch = '\t';
-							break;
-						case '0':
-						case '1':
-						case '2':
-						case '3':
-						case '4':
-						case '5':
-						case '6':
-						case '7':
-						case '8':
-						case '9':
-							if ((marg =
-								sym_FindMacroArg(ch
-								    -
-								    '0'))
-							    != NULL) {
-								while (*marg)
-									yylval.
-									    tzString
-									    [index++]
-									    =
-									    *marg++;
-								ch = 0;
-							}
-							break;
-						case '@':
-							if ((marg =
-								sym_FindMacroArg
-								(-1)) != NULL) {
-								while (*marg)
-									yylval.
-									    tzString
-									    [index++]
-									    =
-									    *marg++;
-								ch = 0;
-							}
-							break;
-						}
-					} else if (ch == '{') {
-						char sym[MAXSYMLEN];
-						int i = 0;
+	if (*pLexBuffer == '"')
+		pLexBuffer++;
+	else
+		yyerror("Unterminated string");
+}
 
-						while ((*pLexBuffer != '}')
-						    && (*pLexBuffer != '"')
-						    && (*pLexBuffer !=
-							'\n')) {
-							if ((ch =
-								*pLexBuffer++) ==
-							    '\\') {
-								switch (ch =
-								    (*pLexBuffer++)) {
-								case '0':
-								case '1':
-								case '2':
-								case '3':
-								case '4':
-								case '5':
-								case '6':
-								case '7':
-								case '8':
-								case '9':
-									if ((marg = sym_FindMacroArg(ch - '0')) != NULL) {
-										while
-										    (*marg)
-											sym[i++] = *marg++;
-										ch = 0;
-									}
-									break;
-								case '@':
-									if ((marg = sym_FindMacroArg(-1)) != NULL) {
-										while
-										    (*marg)
-											sym[i++] = *marg++;
-										ch = 0;
-									}
-									break;
-								}
-							} else
-								sym[i++] = ch;
-						}
+ULONG
+yylex_NORMAL()
+{
+	struct sLexString *pLongestFixed = NULL;
+	ULONG nFloatMask, nFloatLen;
+	ULONG linestart = AtLineStart;
 
-						sym[i] = 0;
-						index +=
-						    symvaluetostring(&yylval.
-						    tzString
-						    [index],
-						    sym);
-						if (*pLexBuffer == '}')
-							pLexBuffer += 1;
-						else
-							yyerror("Missing }");
-						ch = 0;
-					}
-					if (ch)
-						yylval.tzString[index++] = ch;
-				}
+	AtLineStart = 0;
 
-				yylval.tzString[index++] = 0;
+scanagain:
+	while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
+		linestart = 0;
+		pLexBuffer++;
+	}
 
-				if (*pLexBuffer == '\n')
-					yyerror("Unterminated string");
-				else
-					pLexBuffer += 1;
+	if (*pLexBuffer == 0) {
+		// Reached the end of a file, macro, or rept.
+		if (yywrap() == 0) {
+			linestart = AtLineStart;
+			AtLineStart = 0;
+			goto scanagain;
+		}
+	}
 
-				return (T_STRING);
-			} else if (*pLexBuffer == '{') {
-				char sym[MAXSYMLEN], ch, *marg;
-				int i = 0;
+	// Try to match an identifier, macro argument (e.g. \1),
+	// or numeric literal.
+	yylex_GetFloatMaskAndFloatLen(&nFloatMask, &nFloatLen);
 
-				pLexBuffer += 1;
+	// Try to match a keyword or operator.
+	pLongestFixed = yylex_GetLongestFixed();
 
-				while ((*pLexBuffer != '}')
-				    && (*pLexBuffer != '\n')) {
-					if ((ch = *pLexBuffer++) == '\\') {
-						switch (ch = (*pLexBuffer++)) {
-						case '0':
-						case '1':
-						case '2':
-						case '3':
-						case '4':
-						case '5':
-						case '6':
-						case '7':
-						case '8':
-						case '9':
-							if ((marg =
-								sym_FindMacroArg(ch
-								    -
-								    '0'))
-							    != NULL) {
-								while (*marg)
-									sym[i++]
-									    =
-									    *marg++;
-								ch = 0;
-							}
-							break;
-						case '@':
-							if ((marg =
-								sym_FindMacroArg
-								(-1)) != NULL) {
-								while (*marg)
-									sym[i++]
-									    =
-									    *marg++;
-								ch = 0;
-							}
-							break;
-						}
-					} else
-						sym[i++] = ch;
-				}
-				sym[i] = 0;
-				symvaluetostring(yylval.tzString, sym);
-				if (*pLexBuffer == '}')
-					pLexBuffer += 1;
-				else
-					yyerror("Missing }");
+	if (nFloatLen == 0 && pLongestFixed == NULL) {
+		// No keyword, identifier, operator, or numerical literal matches.
 
-				return (T_STRING);
-			} else {
-				if (*pLexBuffer == '\n')
-					AtLineStart = 1;
+		if (*pLexBuffer == '"') {
+			pLexBuffer++;
+			yylex_ReadQuotedString();
+			return T_STRING;
+		} else if (*pLexBuffer == '{') {
+			pLexBuffer++;
+			yylex_ReadBracketedSymbol(yylval.tzString, 0);
+			return T_STRING;
+		} else {
+			// It's not a keyword, operator, identifier, macro argument,
+			// numeric literal, string, or bracketed symbol, so just return
+			// the ASCII character.
+			if (*pLexBuffer == '\n')
+				AtLineStart = 1;
 
-				yyleng = 1;
-				return (*pLexBuffer++);
-			}
+			return *pLexBuffer++;
 		}
-		if (nFloatLen == 0) {
-			yyleng = pLongestFixed->nNameLength;
-			pLexBuffer += yyleng;
-			return (pLongestFixed->nToken);
-		}
-		if (pLongestFixed == NULL) {
-			struct sLexFloat *tok;
+	}
 
-			tok = lexgetfloat(nOldFloatMask);
-			yyleng = nFloatLen;
-			if (tok->Callback) {
-				if (tok->Callback(pLexBuffer, yyleng) == 0)
-					goto scanagain;
-			}
-			if (tok->nToken == T_ID && linestart) {
-				pLexBuffer += yyleng;
-				return (T_LABEL);
-			} else {
-				pLexBuffer += yyleng;
-				return (tok->nToken);
-			}
+	if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) {
+		// Longest match was an identifier, macro argument, or numeric literal.
+		struct sLexFloat *token = lexgetfloat(nFloatMask);
+
+		if (token->Callback) {
+			int done = token->Callback(pLexBuffer, nFloatLen);
+			if (!done)
+				goto scanagain;
 		}
-		if (nFloatLen > pLongestFixed->nNameLength) {
-			struct sLexFloat *tok;
 
-			tok = lexgetfloat(nOldFloatMask);
-			yyleng = nFloatLen;
-			if (tok->Callback) {
-				if (tok->Callback(pLexBuffer, yyleng) == 0)
-					goto scanagain;
-			}
-			if (tok->nToken == T_ID && linestart) {
-				pLexBuffer += yyleng;
-				return (T_LABEL);
-			} else {
-				pLexBuffer += yyleng;
-				return (tok->nToken);
-			}
+		pLexBuffer += nFloatLen;
+
+		if (token->nToken == T_ID && linestart) {
+			return T_LABEL;
 		} else {
-			yyleng = pLongestFixed->nNameLength;
-			pLexBuffer += yyleng;
-			return (pLongestFixed->nToken);
+			return token->nToken;
 		}
-		break;
+	}
 
-	case LEX_STATE_MACROARGS:
-		{
-			ULONG index = 0;
+	// Longest match was a keyword or operator.
+	pLexBuffer += pLongestFixed->nNameLength;
+	return pLongestFixed->nToken;
+}
 
-			while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
-				linestart = 0;
-				pLexBuffer += 1;
-			}
+ULONG
+yylex_MACROARGS()
+{
+	size_t index = 0;
+	size_t length, maxLength;
 
-			while ((*pLexBuffer != ',')
-			    && (*pLexBuffer != '\n')) {
-				char ch, *marg;
+	while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
+		pLexBuffer++;
+	}
 
-				if ((ch = *pLexBuffer++) == '\\') {
-					switch (ch = (*pLexBuffer++)) {
-					case 'n':
-						ch = '\n';
-						break;
-					case 't':
-						ch = '\t';
-						break;
-					case '0':
-					case '1':
-					case '2':
-					case '3':
-					case '4':
-					case '5':
-					case '6':
-					case '7':
-					case '8':
-					case '9':
-						if ((marg =
-							sym_FindMacroArg(ch -
-							    '0')) !=
-						    NULL) {
-							while (*marg)
-								yylval.
-								    tzString
-								    [index++] =
-								    *marg++;
-							ch = 0;
-						}
-						break;
-					case '@':
-						if ((marg =
-							sym_FindMacroArg(-1)) !=
-						    NULL) {
-							while (*marg)
-								yylval.
-								    tzString
-								    [index++] =
-								    *marg++;
-							ch = 0;
-						}
-						break;
-					}
-				} else if (ch == '{') {
-					char sym[MAXSYMLEN];
-					int i = 0;
+	while (*pLexBuffer != ',' && (*pLexBuffer != '\n')) {
+		char ch = *pLexBuffer++;
 
-					while ((*pLexBuffer != '}')
-					    && (*pLexBuffer != '"')
-					    && (*pLexBuffer != '\n')) {
-						if ((ch =
-							*pLexBuffer++) == '\\') {
-							switch (ch =
-							    (*pLexBuffer++)) {
-							case '0':
-							case '1':
-							case '2':
-							case '3':
-							case '4':
-							case '5':
-							case '6':
-							case '7':
-							case '8':
-							case '9':
-								if ((marg =
-									sym_FindMacroArg
-									(ch -
-									    '0')) !=
-								    NULL) {
-									while
-									    (*marg)
-										sym[i++] = *marg++;
-									ch = 0;
-								}
-								break;
-							case '@':
-								if ((marg =
-									sym_FindMacroArg
-									(-1)) !=
-								    NULL) {
-									while
-									    (*marg)
-										sym[i++] = *marg++;
-									ch = 0;
-								}
-								break;
-							}
-						} else
-							sym[i++] = ch;
-					}
-					sym[i] = 0;
-					index +=
-					    symvaluetostring(&yylval.
-					    tzString[index],
-					    sym);
-					if (*pLexBuffer == '}')
-						pLexBuffer += 1;
-					else
-						yyerror("Missing }");
-					ch = 0;
-				}
-				if (ch)
-					yylval.tzString[index++] = ch;
-			}
+		if (ch == '\\') {
+			ch = *pLexBuffer++;
 
-			if (index) {
-				yyleng = index;
-				yylval.tzString[index] = 0;
-				if (*pLexBuffer == '\n') {
-					while (yylval.tzString[--index] == ' ') {
-						yylval.tzString[index] = 0;
-						yyleng -= 1;
-					}
-				}
-				return (T_STRING);
-			} else if (*pLexBuffer == '\n') {
-				pLexBuffer += 1;
-				AtLineStart = 1;
-				yyleng = 1;
-				return ('\n');
-			} else if (*pLexBuffer == ',') {
-				pLexBuffer += 1;
-				yyleng = 1;
-				return (',');
-			} else {
-				yyerror("INTERNAL ERROR IN YYLEX");
-				return (0);
+			switch (ch) {
+			case 'n':
+				ch = '\n';
+				break;
+			case 't':
+				ch = '\t';
+				break;
+			case '\\':
+				ch = '\\';
+				break;
+			default:
+				maxLength = MAXSTRLEN - index;
+				length = CopyMacroArg(&yylval.tzString[index], maxLength, ch);
+
+				if (length != 0)
+					index += length;
+				else
+					fatalerror("Illegal character escape '%c'", ch);
+				
+				ch = 0;
+				break;
 			}
+		} else if (ch == '{') {
+			index += yylex_ReadBracketedSymbol(yylval.tzString, index);
+			ch = 0;
 		}
+		if (ch)
+			yylex_StringWriteChar(yylval.tzString, index++, ch);
+	}
 
-		break;
+	if (index) {
+		yylex_StringWriteChar(yylval.tzString, index, 0);
+
+		// trim trailing white space at the end of the line
+		if (*pLexBuffer == '\n')
+			yylex_TrimEnd(yylval.tzString, index);
+
+		return T_STRING;
+	} else if (*pLexBuffer == '\n') {
+		pLexBuffer++;
+		AtLineStart = 1;
+		return '\n';
+	} else if (*pLexBuffer == ',') {
+		pLexBuffer++;
+		return ',';
 	}
 
-	yyerror("INTERNAL ERROR IN YYLEX");
-	return (0);
+	fatalerror("Internal error in yylex_MACROARGS");
+	return 0;
+}
+
+ULONG 
+yylex(void)
+{
+	switch (lexerstate) {
+	case LEX_STATE_NORMAL:
+		return yylex_NORMAL();
+	case LEX_STATE_MACROARGS:
+		return yylex_MACROARGS();
+	}
+
+	fatalerror("Internal error in yylex");
+	return 0;
 }
--- a/src/asm/yaccprt1.y
+++ b/src/asm/yaccprt1.y
@@ -21,14 +21,38 @@
 char	*tzNewMacro;
 ULONG	ulNewMacroSize;
 
-ULONG	symvaluetostring( char *dest, char *sym )
+size_t symvaluetostring(char *dest, size_t maxLength, char *sym)
 {
-	if( sym_isString(sym) )
-		strcpy( dest, sym_GetStringValue(sym) );
-	else
-		sprintf( dest, "$%lX", sym_GetConstantValue(sym) );
+	size_t length;
 
-	return( strlen(dest) );
+	if (sym_isString(sym)) {
+		char *src = sym_GetStringValue(sym);
+		size_t i;
+
+		for (i = 0; src[i] != 0; i++) {
+			if (i >= maxLength) {
+				fatalerror("Symbol value too long to fit buffer");
+			}
+			dest[i] = src[i];
+		}
+
+		length = i;
+	} else {
+		ULONG value = sym_GetConstantValue(sym);
+		int fullLength = snprintf(dest, maxLength + 1, "$%lX", value);
+
+		if (fullLength < 0) {
+			fatalerror("snprintf encoding error");
+		} else {
+			length = (size_t)fullLength;
+
+			if (length > maxLength) {
+				fatalerror("Symbol value too long to fit buffer");
+			}
+		}
+	}
+
+	return length;
 }
 
 ULONG	str2int( char *s )
@@ -335,8 +359,8 @@
 
 %union
 {
-    char tzSym[MAXSYMLEN+1];
-    char tzString[256];
+    char tzSym[MAXSYMLEN + 1];
+    char tzString[MAXSTRLEN + 1];
     struct Expression sVal;
     SLONG nConstValue;
 }