ref: 2bf31870a7b9be374e24c20aaac13985b5e33f67
parent: 6198cc185c24ec4ba794ed27c969ae65d31f89c3
author: YamaArashi <[email protected]>
date: Fri Aug 22 17:44:18 EDT 2014
Cleaned up lexer - separated the lexer into multiple functions so it is more readable - fixed issue with long label names in macro arguments - added error checking code to prevent buffer overflows
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -5,7 +5,8 @@
#include "asm/types.h"
-#define LEXHASHSIZE 512
+#define LEXHASHSIZE (1 << 11)
+#define MAXSTRLEN 255
struct sLexInitString {
char *tzName;
@@ -18,7 +19,9 @@
};
struct yy_buffer_state {
- char *pBufferStart;
+ char *pBufferRealStart; // actual starting address
+ char *pBufferStart; // address where the data is initially written
+ // after the "safety margin"
char *pBuffer;
ULONG nBufferSize;
ULONG oAtLineStart;
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -18,18 +18,18 @@
ULONG nNameLength;
struct sLexString *pNext;
};
-#define pLexBuffer (pCurrentBuffer->pBuffer)
-#define nLexBufferLeng (pCurrentBuffer->nBufferSize)
+#define pLexBufferRealStart (pCurrentBuffer->pBufferRealStart)
+#define pLexBuffer (pCurrentBuffer->pBuffer)
+#define AtLineStart (pCurrentBuffer->oAtLineStart)
#define SAFETYMARGIN 1024
-extern ULONG symvaluetostring(char *dest, char *s);
+extern size_t symvaluetostring(char *dest, size_t maxLength, char *sym);
struct sLexFloat tLexFloat[32];
struct sLexString *tLexHash[LEXHASHSIZE];
YY_BUFFER_STATE pCurrentBuffer;
-ULONG yyleng;
-ULONG nLexMaxLeng;
+ULONG nLexMaxLength; // max length of all keywords and operators
ULONG tFloatingSecondChar[256];
ULONG tFloatingFirstChar[256];
@@ -37,8 +37,6 @@
ULONG nFloating;
enum eLexerState lexerstate = LEX_STATE_NORMAL;
-#define AtLineStart pCurrentBuffer->oAtLineStart
-
#ifdef __GNUC__
void
strupr(char *s)
@@ -73,6 +71,9 @@
void
yyunput(char c)
{
+ if (pLexBuffer <= pLexBufferRealStart)
+ fatalerror("Buffer safety margin exceeded");
+
*(--pLexBuffer) = c;
}
@@ -79,12 +80,15 @@
void
yyunputstr(char *s)
{
- SLONG i;
+ int i, len;
- i = strlen(s) - 1;
+ len = strlen(s);
- while (i >= 0)
- yyunput(s[i--]);
+ if (pLexBuffer - len < pLexBufferRealStart)
+ fatalerror("Buffer safety margin exceeded");
+
+ for (i = len - 1; i >= 0; i--)
+ *(--pLexBuffer) = s[i];
}
void
@@ -114,10 +118,10 @@
if ((pBuffer =
(YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) !=
NULL) {
- if ((pBuffer->pBuffer = pBuffer->pBufferStart =
+ if ((pBuffer->pBufferRealStart =
(char *) malloc(size + 1 + SAFETYMARGIN)) != NULL) {
- pBuffer->pBuffer += SAFETYMARGIN;
- pBuffer->pBufferStart += SAFETYMARGIN;
+ pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN;
+ pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN;
memcpy(pBuffer->pBuffer, mem, size);
pBuffer->nBufferSize = size;
pBuffer->oAtLineStart = 1;
@@ -135,8 +139,7 @@
YY_BUFFER_STATE pBuffer;
if ((pBuffer =
- (YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) !=
- NULL) {
+ (YY_BUFFER_STATE) malloc(sizeof(struct yy_buffer_state))) != NULL) {
ULONG size;
fseek(f, 0, SEEK_END);
@@ -143,13 +146,13 @@
size = ftell(f);
fseek(f, 0, SEEK_SET);
- if ((pBuffer->pBuffer = pBuffer->pBufferStart =
+ if ((pBuffer->pBufferRealStart =
(char *) malloc(size + 2 + SAFETYMARGIN)) != NULL) {
char *mem;
ULONG instring = 0;
- pBuffer->pBuffer += SAFETYMARGIN;
- pBuffer->pBufferStart += SAFETYMARGIN;
+ pBuffer->pBufferStart = pBuffer->pBufferRealStart + SAFETYMARGIN;
+ pBuffer->pBuffer = pBuffer->pBufferRealStart + SAFETYMARGIN;
size = fread(pBuffer->pBuffer, sizeof(UBYTE), size, f);
@@ -167,7 +170,7 @@
mem += 1;
} else {
if ((mem[0] == 10 && mem[1] == 13)
- || (mem[0] == 13 && mem[1] == 10)) {
+ || (mem[0] == 13 && mem[1] == 10)) {
mem[0] = ' ';
mem[1] = '\n';
mem += 2;
@@ -174,17 +177,12 @@
} else if (mem[0] == 10 || mem[0] == 13) {
mem[0] = '\n';
mem += 1;
- } else if (mem[0] == '\n'
- && mem[1] == '*') {
+ } else if (mem[0] == '\n' && mem[1] == '*') {
mem += 1;
- while (!
- (*mem == '\n'
- || *mem == '\0'))
+ while (!(*mem == '\n' || *mem == '\0'))
*mem++ = ' ';
} else if (*mem == ';') {
- while (!
- (*mem == '\n'
- || *mem == '\0'))
+ while (!(*mem == '\n' || *mem == '\0'))
*mem++ = ' ';
} else
mem += 1;
@@ -199,17 +197,33 @@
return (NULL);
}
-ULONG
-lex_FloatAlloc(struct sLexFloat * tok)
+ULONG
+lex_FloatAlloc(struct sLexFloat *token)
{
- tLexFloat[nFloating] = (*tok);
+ tLexFloat[nFloating] = *token;
return (1 << (nFloating++));
}
+/*
+ * Make sure that only non-zero ASCII characters are used. Also, check if the
+ * start is greater than the end of the range.
+ */
+void
+lex_CheckCharacterRange(UWORD start, UWORD end)
+{
+ if (start > end || start < 1 || end > 127) {
+ fprintf(stderr, "Invalid character range (start: %u, end: %u)\n",
+ start, end);
+ exit(1);
+ }
+}
+
void
lex_FloatDeleteRange(ULONG id, UWORD start, UWORD end)
{
+ lex_CheckCharacterRange(start, end);
+
while (start <= end) {
tFloatingChars[start] &= ~id;
start += 1;
@@ -219,6 +233,8 @@
void
lex_FloatAddRange(ULONG id, UWORD start, UWORD end)
{
+ lex_CheckCharacterRange(start, end);
+
while (start <= end) {
tFloatingChars[start] |= id;
start += 1;
@@ -228,6 +244,8 @@
void
lex_FloatDeleteFirstRange(ULONG id, UWORD start, UWORD end)
{
+ lex_CheckCharacterRange(start, end);
+
while (start <= end) {
tFloatingFirstChar[start] &= ~id;
start += 1;
@@ -237,6 +255,8 @@
void
lex_FloatAddFirstRange(ULONG id, UWORD start, UWORD end)
{
+ lex_CheckCharacterRange(start, end);
+
while (start <= end) {
tFloatingFirstChar[start] |= id;
start += 1;
@@ -246,6 +266,8 @@
void
lex_FloatDeleteSecondRange(ULONG id, UWORD start, UWORD end)
{
+ lex_CheckCharacterRange(start, end);
+
while (start <= end) {
tFloatingSecondChar[start] &= ~id;
start += 1;
@@ -255,6 +277,8 @@
void
lex_FloatAddSecondRange(ULONG id, UWORD start, UWORD end)
{
+ lex_CheckCharacterRange(start, end);
+
while (start <= end) {
tFloatingSecondChar[start] |= id;
start += 1;
@@ -262,32 +286,32 @@
}
struct sLexFloat *
-lexgetfloat(ULONG id)
+lexgetfloat(ULONG nFloatMask)
{
- ULONG r = 0, mask = 1;
+ if (nFloatMask == 0) {
+ fatalerror("Internal error in lexgetfloat");
+ }
- if (id == 0)
- return (NULL);
+ int i = 0;
- while ((id & mask) == 0) {
- mask <<= 1;
- r += 1;
+ while ((nFloatMask & 1) == 0) {
+ nFloatMask >>= 1;
+ i++;
}
- return (&tLexFloat[r]);
+ return (&tLexFloat[i]);
}
ULONG
lexcalchash(char *s)
{
- ULONG r = 0;
+ ULONG hash = 0;
while (*s) {
- r = ((r << 1) + (toupper(*s))) % LEXHASHSIZE;
- s += 1;
+ hash = (hash * 283) ^ toupper(*s++);
}
- return (r);
+ return (hash % LEXHASHSIZE);
}
void
@@ -295,17 +319,17 @@
{
ULONG i;
- for (i = 0; i < LEXHASHSIZE; i += 1) {
+ for (i = 0; i < LEXHASHSIZE; i++) {
tLexHash[i] = NULL;
}
- for (i = 0; i < 256; i += 1) {
+ for (i = 0; i < 256; i++) {
tFloatingFirstChar[i] = 0;
tFloatingSecondChar[i] = 0;
tFloatingChars[i] = 0;
}
- nLexMaxLeng = 0;
+ nLexMaxLength = 0;
nFloating = 0;
}
@@ -333,8 +357,8 @@
strupr((*ppHash)->tzName);
- if ((*ppHash)->nNameLength > nLexMaxLeng)
- nLexMaxLeng = (*ppHash)->nNameLength;
+ if ((*ppHash)->nNameLength > nLexMaxLength)
+ nLexMaxLength = (*ppHash)->nNameLength;
} else
fatalerror("Out of memory!");
@@ -345,458 +369,391 @@
}
}
-ULONG
-yylex(void)
+/*
+ * Gets the "float" mask and "float" length.
+ * "Float" refers to the token type of a token that is not a keyword.
+ * The character classes floatingFirstChar, floatingSecondChar, and
+ * floatingChars are defined separately for each token type.
+ * It uses bit masks to match against a set of simple regular expressions
+ * of the form /[floatingFirstChar]([floatingSecondChar][floatingChars]*)?/.
+ * The token types with the longest match from the current position in the
+ * buffer will have their bits set in the float mask.
+ */
+void
+yylex_GetFloatMaskAndFloatLen(ULONG *pnFloatMask, ULONG *pnFloatLen)
{
- ULONG hash, maxlen;
- char *s;
- struct sLexString *pLongestFixed = NULL;
- ULONG nFloatMask, nOldFloatMask, nFloatLen;
- ULONG linestart = AtLineStart;
+ // Note that '\0' should always have a bit mask of 0 in the "floating"
+ // tables, so it doesn't need to be checked for separately.
- switch (lexerstate) {
- case LEX_STATE_NORMAL:
- AtLineStart = 0;
+ char *s = pLexBuffer;
+ ULONG nOldFloatMask = 0;
+ ULONG nFloatMask = tFloatingFirstChar[(int)*s];
-scanagain:
+ if (nFloatMask != 0) {
+ s++;
+ nOldFloatMask = nFloatMask;
+ nFloatMask &= tFloatingSecondChar[(int)*s];
- while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
- linestart = 0;
- pLexBuffer += 1;
+ while (nFloatMask != 0) {
+ s++;
+ nOldFloatMask = nFloatMask;
+ nFloatMask &= tFloatingChars[(int)*s];
}
+ }
- if (*pLexBuffer == 0) {
- if (yywrap() == 0) {
- linestart = AtLineStart;
- AtLineStart = 0;
- goto scanagain;
+ *pnFloatMask = nOldFloatMask;
+ *pnFloatLen = (ULONG)(s - pLexBuffer);
+}
+
+/*
+ * Gets the longest keyword/operator from the current position in the buffer.
+ */
+struct sLexString *
+yylex_GetLongestFixed()
+{
+ struct sLexString *pLongestFixed = NULL;
+ char *s = pLexBuffer;
+ ULONG hash = 0;
+ ULONG length = 0;
+
+ while (length < nLexMaxLength && *s) {
+ hash = (hash * 283) ^ toupper(*s);
+ s++;
+ length++;
+
+ struct sLexString *lex = tLexHash[hash % LEXHASHSIZE];
+
+ while (lex) {
+ if (lex->nNameLength == length
+ && strncasecmp(pLexBuffer, lex->tzName, length) == 0) {
+ pLongestFixed = lex;
+ break;
}
+ lex = lex->pNext;
}
- s = pLexBuffer;
- nOldFloatMask = nFloatLen = 0;
- nFloatMask = tFloatingFirstChar[(int) *s++];
- while (nFloatMask && nFloatLen < nLexBufferLeng) {
- nFloatLen += 1;
- nOldFloatMask = nFloatMask;
- if (nFloatLen == 1)
- nFloatMask &= tFloatingSecondChar[(int) *s++];
- else
- nFloatMask &= tFloatingChars[(int) *s++];
+ }
+
+ return pLongestFixed;
+}
+
+size_t
+CopyMacroArg(char *dest, size_t maxLength, char c)
+{
+ int i;
+ char *s;
+ int argNum;
+
+ switch (c) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ argNum = c - '0';
+ break;
+ case '@':
+ argNum = -1;
+ break;
+ default:
+ return 0;
+ }
+
+ if ((s = sym_FindMacroArg(argNum)) == NULL)
+ fatalerror("Macro argument not defined");
+
+ for (i = 0; s[i] != 0; i++) {
+ if (i >= maxLength) {
+ fatalerror("Macro argument too long to fit buffer");
}
+ dest[i] = s[i];
+ }
- maxlen = nLexBufferLeng;
- if (nLexMaxLeng < maxlen)
- maxlen = nLexMaxLeng;
+ return i;
+}
- yyleng = 0;
- hash = 0;
- s = pLexBuffer;
- while (yyleng < nLexMaxLeng) {
- /* XXX: Kludge warning! The dereference of s below
- * may go beyond the end of the buffer. We use the
- * following test to stop that from happening,
- * without really understanding what the rest of
- * the code is doing. This may not be the correct
- * fix! */
- if (!*s)
+static inline void
+yylex_StringWriteChar(char *s, size_t index, char c)
+{
+ if (index >= MAXSTRLEN) {
+ fatalerror("String too long");
+ }
+
+ s[index] = c;
+}
+
+static inline void
+yylex_SymbolWriteChar(char *s, size_t index, char c)
+{
+ if (index >= MAXSYMLEN) {
+ fatalerror("Symbol too long");
+ }
+
+ s[index] = c;
+}
+
+/*
+ * Trims white space at the end of a string.
+ * The index parameter is the index of the 0 at the end of the string.
+ */
+void yylex_TrimEnd(char *s, size_t index)
+{
+ int i;
+
+ for (i = (int)index - 1; i >= 0 && (s[i] == ' ' || s[i] == '\t'); i--)
+ s[i] = 0;
+}
+
+size_t
+yylex_ReadBracketedSymbol(char *dest, size_t index)
+{
+ char sym[MAXSYMLEN + 1];
+ char ch;
+ size_t i = 0;
+ size_t length, maxLength;
+
+ for (ch = *pLexBuffer;
+ ch != '}' && ch != '"' && ch != '\n';
+ ch = *(++pLexBuffer)) {
+ if (ch == '\\') {
+ ch = *(++pLexBuffer);
+ maxLength = MAXSYMLEN - i;
+ length = CopyMacroArg(&sym[i], maxLength, ch);
+
+ if (length != 0)
+ i += length;
+ else
+ fatalerror("Illegal character escape '%c'", ch);
+ } else
+ yylex_SymbolWriteChar(sym, i++, ch);
+ }
+
+ yylex_SymbolWriteChar(sym, i, 0);
+
+ maxLength = MAXSTRLEN - index; // it's assumed we're writing to a T_STRING
+ length = symvaluetostring(&dest[index], maxLength, sym);
+
+ if (*pLexBuffer == '}')
+ pLexBuffer++;
+ else
+ yyerror("Missing }");
+
+ return length;
+}
+
+void
+yylex_ReadQuotedString()
+{
+ size_t index = 0;
+ size_t length, maxLength;
+
+ while (*pLexBuffer != '"' && *pLexBuffer != '\n') {
+ char ch = *pLexBuffer++;
+
+ if (ch == '\\') {
+ ch = *pLexBuffer++;
+
+ switch (ch) {
+ case 'n':
+ ch = '\n';
break;
+ case 't':
+ ch = '\t';
+ break;
+ case '\\':
+ ch = '\\';
+ break;
+ case '"':
+ ch = '"';
+ break;
+ default:
+ maxLength = MAXSTRLEN - index;
+ length = CopyMacroArg(&yylval.tzString[index], maxLength, ch);
- yyleng += 1;
- hash = ((hash << 1) + (toupper(*s))) % LEXHASHSIZE;
- s += 1;
- if (tLexHash[hash]) {
- struct sLexString *lex;
+ if (length != 0)
+ index += length;
+ else
+ fatalerror("Illegal character escape '%c'", ch);
- lex = tLexHash[hash];
- while (lex) {
- if (lex->nNameLength == yyleng) {
- if (strncasecmp
- (pLexBuffer, lex->tzName,
- yyleng) == 0) {
- pLongestFixed = lex;
- }
- }
- lex = lex->pNext;
- }
+ ch = 0;
+ break;
}
+ } else if (ch == '{') {
+ // Get bracketed symbol within string.
+ index += yylex_ReadBracketedSymbol(yylval.tzString, index);
+ ch = 0;
}
- if (nFloatLen == 0 && pLongestFixed == NULL) {
- if (*pLexBuffer == '"') {
- ULONG index = 0;
+ if (ch)
+ yylex_StringWriteChar(yylval.tzString, index++, ch);
+ }
- pLexBuffer += 1;
- while ((*pLexBuffer != '"')
- && (*pLexBuffer != '\n')) {
- char ch, *marg;
+ yylex_StringWriteChar(yylval.tzString, index, 0);
- if ((ch = *pLexBuffer++) == '\\') {
- switch (ch = (*pLexBuffer++)) {
- case 'n':
- ch = '\n';
- break;
- case 't':
- ch = '\t';
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if ((marg =
- sym_FindMacroArg(ch
- -
- '0'))
- != NULL) {
- while (*marg)
- yylval.
- tzString
- [index++]
- =
- *marg++;
- ch = 0;
- }
- break;
- case '@':
- if ((marg =
- sym_FindMacroArg
- (-1)) != NULL) {
- while (*marg)
- yylval.
- tzString
- [index++]
- =
- *marg++;
- ch = 0;
- }
- break;
- }
- } else if (ch == '{') {
- char sym[MAXSYMLEN];
- int i = 0;
+ if (*pLexBuffer == '"')
+ pLexBuffer++;
+ else
+ yyerror("Unterminated string");
+}
- while ((*pLexBuffer != '}')
- && (*pLexBuffer != '"')
- && (*pLexBuffer !=
- '\n')) {
- if ((ch =
- *pLexBuffer++) ==
- '\\') {
- switch (ch =
- (*pLexBuffer++)) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if ((marg = sym_FindMacroArg(ch - '0')) != NULL) {
- while
- (*marg)
- sym[i++] = *marg++;
- ch = 0;
- }
- break;
- case '@':
- if ((marg = sym_FindMacroArg(-1)) != NULL) {
- while
- (*marg)
- sym[i++] = *marg++;
- ch = 0;
- }
- break;
- }
- } else
- sym[i++] = ch;
- }
+ULONG
+yylex_NORMAL()
+{
+ struct sLexString *pLongestFixed = NULL;
+ ULONG nFloatMask, nFloatLen;
+ ULONG linestart = AtLineStart;
- sym[i] = 0;
- index +=
- symvaluetostring(&yylval.
- tzString
- [index],
- sym);
- if (*pLexBuffer == '}')
- pLexBuffer += 1;
- else
- yyerror("Missing }");
- ch = 0;
- }
- if (ch)
- yylval.tzString[index++] = ch;
- }
+ AtLineStart = 0;
- yylval.tzString[index++] = 0;
+scanagain:
+ while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
+ linestart = 0;
+ pLexBuffer++;
+ }
- if (*pLexBuffer == '\n')
- yyerror("Unterminated string");
- else
- pLexBuffer += 1;
+ if (*pLexBuffer == 0) {
+ // Reached the end of a file, macro, or rept.
+ if (yywrap() == 0) {
+ linestart = AtLineStart;
+ AtLineStart = 0;
+ goto scanagain;
+ }
+ }
- return (T_STRING);
- } else if (*pLexBuffer == '{') {
- char sym[MAXSYMLEN], ch, *marg;
- int i = 0;
+ // Try to match an identifier, macro argument (e.g. \1),
+ // or numeric literal.
+ yylex_GetFloatMaskAndFloatLen(&nFloatMask, &nFloatLen);
- pLexBuffer += 1;
+ // Try to match a keyword or operator.
+ pLongestFixed = yylex_GetLongestFixed();
- while ((*pLexBuffer != '}')
- && (*pLexBuffer != '\n')) {
- if ((ch = *pLexBuffer++) == '\\') {
- switch (ch = (*pLexBuffer++)) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if ((marg =
- sym_FindMacroArg(ch
- -
- '0'))
- != NULL) {
- while (*marg)
- sym[i++]
- =
- *marg++;
- ch = 0;
- }
- break;
- case '@':
- if ((marg =
- sym_FindMacroArg
- (-1)) != NULL) {
- while (*marg)
- sym[i++]
- =
- *marg++;
- ch = 0;
- }
- break;
- }
- } else
- sym[i++] = ch;
- }
- sym[i] = 0;
- symvaluetostring(yylval.tzString, sym);
- if (*pLexBuffer == '}')
- pLexBuffer += 1;
- else
- yyerror("Missing }");
+ if (nFloatLen == 0 && pLongestFixed == NULL) {
+ // No keyword, identifier, operator, or numerical literal matches.
- return (T_STRING);
- } else {
- if (*pLexBuffer == '\n')
- AtLineStart = 1;
+ if (*pLexBuffer == '"') {
+ pLexBuffer++;
+ yylex_ReadQuotedString();
+ return T_STRING;
+ } else if (*pLexBuffer == '{') {
+ pLexBuffer++;
+ yylex_ReadBracketedSymbol(yylval.tzString, 0);
+ return T_STRING;
+ } else {
+ // It's not a keyword, operator, identifier, macro argument,
+ // numeric literal, string, or bracketed symbol, so just return
+ // the ASCII character.
+ if (*pLexBuffer == '\n')
+ AtLineStart = 1;
- yyleng = 1;
- return (*pLexBuffer++);
- }
+ return *pLexBuffer++;
}
- if (nFloatLen == 0) {
- yyleng = pLongestFixed->nNameLength;
- pLexBuffer += yyleng;
- return (pLongestFixed->nToken);
- }
- if (pLongestFixed == NULL) {
- struct sLexFloat *tok;
+ }
- tok = lexgetfloat(nOldFloatMask);
- yyleng = nFloatLen;
- if (tok->Callback) {
- if (tok->Callback(pLexBuffer, yyleng) == 0)
- goto scanagain;
- }
- if (tok->nToken == T_ID && linestart) {
- pLexBuffer += yyleng;
- return (T_LABEL);
- } else {
- pLexBuffer += yyleng;
- return (tok->nToken);
- }
+ if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) {
+ // Longest match was an identifier, macro argument, or numeric literal.
+ struct sLexFloat *token = lexgetfloat(nFloatMask);
+
+ if (token->Callback) {
+ int done = token->Callback(pLexBuffer, nFloatLen);
+ if (!done)
+ goto scanagain;
}
- if (nFloatLen > pLongestFixed->nNameLength) {
- struct sLexFloat *tok;
- tok = lexgetfloat(nOldFloatMask);
- yyleng = nFloatLen;
- if (tok->Callback) {
- if (tok->Callback(pLexBuffer, yyleng) == 0)
- goto scanagain;
- }
- if (tok->nToken == T_ID && linestart) {
- pLexBuffer += yyleng;
- return (T_LABEL);
- } else {
- pLexBuffer += yyleng;
- return (tok->nToken);
- }
+ pLexBuffer += nFloatLen;
+
+ if (token->nToken == T_ID && linestart) {
+ return T_LABEL;
} else {
- yyleng = pLongestFixed->nNameLength;
- pLexBuffer += yyleng;
- return (pLongestFixed->nToken);
+ return token->nToken;
}
- break;
+ }
- case LEX_STATE_MACROARGS:
- {
- ULONG index = 0;
+ // Longest match was a keyword or operator.
+ pLexBuffer += pLongestFixed->nNameLength;
+ return pLongestFixed->nToken;
+}
- while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
- linestart = 0;
- pLexBuffer += 1;
- }
+ULONG
+yylex_MACROARGS()
+{
+ size_t index = 0;
+ size_t length, maxLength;
- while ((*pLexBuffer != ',')
- && (*pLexBuffer != '\n')) {
- char ch, *marg;
+ while (*pLexBuffer == ' ' || *pLexBuffer == '\t') {
+ pLexBuffer++;
+ }
- if ((ch = *pLexBuffer++) == '\\') {
- switch (ch = (*pLexBuffer++)) {
- case 'n':
- ch = '\n';
- break;
- case 't':
- ch = '\t';
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if ((marg =
- sym_FindMacroArg(ch -
- '0')) !=
- NULL) {
- while (*marg)
- yylval.
- tzString
- [index++] =
- *marg++;
- ch = 0;
- }
- break;
- case '@':
- if ((marg =
- sym_FindMacroArg(-1)) !=
- NULL) {
- while (*marg)
- yylval.
- tzString
- [index++] =
- *marg++;
- ch = 0;
- }
- break;
- }
- } else if (ch == '{') {
- char sym[MAXSYMLEN];
- int i = 0;
+ while (*pLexBuffer != ',' && (*pLexBuffer != '\n')) {
+ char ch = *pLexBuffer++;
- while ((*pLexBuffer != '}')
- && (*pLexBuffer != '"')
- && (*pLexBuffer != '\n')) {
- if ((ch =
- *pLexBuffer++) == '\\') {
- switch (ch =
- (*pLexBuffer++)) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if ((marg =
- sym_FindMacroArg
- (ch -
- '0')) !=
- NULL) {
- while
- (*marg)
- sym[i++] = *marg++;
- ch = 0;
- }
- break;
- case '@':
- if ((marg =
- sym_FindMacroArg
- (-1)) !=
- NULL) {
- while
- (*marg)
- sym[i++] = *marg++;
- ch = 0;
- }
- break;
- }
- } else
- sym[i++] = ch;
- }
- sym[i] = 0;
- index +=
- symvaluetostring(&yylval.
- tzString[index],
- sym);
- if (*pLexBuffer == '}')
- pLexBuffer += 1;
- else
- yyerror("Missing }");
- ch = 0;
- }
- if (ch)
- yylval.tzString[index++] = ch;
- }
+ if (ch == '\\') {
+ ch = *pLexBuffer++;
- if (index) {
- yyleng = index;
- yylval.tzString[index] = 0;
- if (*pLexBuffer == '\n') {
- while (yylval.tzString[--index] == ' ') {
- yylval.tzString[index] = 0;
- yyleng -= 1;
- }
- }
- return (T_STRING);
- } else if (*pLexBuffer == '\n') {
- pLexBuffer += 1;
- AtLineStart = 1;
- yyleng = 1;
- return ('\n');
- } else if (*pLexBuffer == ',') {
- pLexBuffer += 1;
- yyleng = 1;
- return (',');
- } else {
- yyerror("INTERNAL ERROR IN YYLEX");
- return (0);
+ switch (ch) {
+ case 'n':
+ ch = '\n';
+ break;
+ case 't':
+ ch = '\t';
+ break;
+ case '\\':
+ ch = '\\';
+ break;
+ default:
+ maxLength = MAXSTRLEN - index;
+ length = CopyMacroArg(&yylval.tzString[index], maxLength, ch);
+
+ if (length != 0)
+ index += length;
+ else
+ fatalerror("Illegal character escape '%c'", ch);
+
+ ch = 0;
+ break;
}
+ } else if (ch == '{') {
+ index += yylex_ReadBracketedSymbol(yylval.tzString, index);
+ ch = 0;
}
+ if (ch)
+ yylex_StringWriteChar(yylval.tzString, index++, ch);
+ }
- break;
+ if (index) {
+ yylex_StringWriteChar(yylval.tzString, index, 0);
+
+ // trim trailing white space at the end of the line
+ if (*pLexBuffer == '\n')
+ yylex_TrimEnd(yylval.tzString, index);
+
+ return T_STRING;
+ } else if (*pLexBuffer == '\n') {
+ pLexBuffer++;
+ AtLineStart = 1;
+ return '\n';
+ } else if (*pLexBuffer == ',') {
+ pLexBuffer++;
+ return ',';
}
- yyerror("INTERNAL ERROR IN YYLEX");
- return (0);
+ fatalerror("Internal error in yylex_MACROARGS");
+ return 0;
+}
+
+ULONG
+yylex(void)
+{
+ switch (lexerstate) {
+ case LEX_STATE_NORMAL:
+ return yylex_NORMAL();
+ case LEX_STATE_MACROARGS:
+ return yylex_MACROARGS();
+ }
+
+ fatalerror("Internal error in yylex");
+ return 0;
}
--- a/src/asm/yaccprt1.y
+++ b/src/asm/yaccprt1.y
@@ -21,14 +21,38 @@
char *tzNewMacro;
ULONG ulNewMacroSize;
-ULONG symvaluetostring( char *dest, char *sym )
+size_t symvaluetostring(char *dest, size_t maxLength, char *sym)
{
- if( sym_isString(sym) )
- strcpy( dest, sym_GetStringValue(sym) );
- else
- sprintf( dest, "$%lX", sym_GetConstantValue(sym) );
+ size_t length;
- return( strlen(dest) );
+ if (sym_isString(sym)) {
+ char *src = sym_GetStringValue(sym);
+ size_t i;
+
+ for (i = 0; src[i] != 0; i++) {
+ if (i >= maxLength) {
+ fatalerror("Symbol value too long to fit buffer");
+ }
+ dest[i] = src[i];
+ }
+
+ length = i;
+ } else {
+ ULONG value = sym_GetConstantValue(sym);
+ int fullLength = snprintf(dest, maxLength + 1, "$%lX", value);
+
+ if (fullLength < 0) {
+ fatalerror("snprintf encoding error");
+ } else {
+ length = (size_t)fullLength;
+
+ if (length > maxLength) {
+ fatalerror("Symbol value too long to fit buffer");
+ }
+ }
+ }
+
+ return length;
}
ULONG str2int( char *s )
@@ -335,8 +359,8 @@
%union
{
- char tzSym[MAXSYMLEN+1];
- char tzString[256];
+ char tzSym[MAXSYMLEN + 1];
+ char tzString[MAXSTRLEN + 1];
struct Expression sVal;
SLONG nConstValue;
}