ref: 71f88717024cd5434ff7ee541a112ece86082121
parent: 6dc4ce65999d9f26703c4728ed983fbec90635c1
author: ISSOtm <[email protected]>
date: Thu Jul 23 09:49:45 EDT 2020
Implement more functionality Macro arg detection, first emitted tokens, primitive (bad) column counting
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -43,10 +43,11 @@
void lexer_ToggleStringExpansion(bool enable);
char const *lexer_GetFileName(void);
-unsigned int lexer_GetLineNo(void);
+uint32_t lexer_GetLineNo(void);
+uint32_t lexer_GetColNo(void);
void lexer_DumpStringExpansions(void);
int yylex(void);
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken,
- char **capture, size_t *size, char const *name);
+ char const **capture, size_t *size, char const *name);
#endif /* RGBDS_ASM_LEXER_H */
--- a/src/asm/asmy.y
+++ b/src/asm/asmy.y
@@ -604,7 +604,7 @@
rept : T_POP_REPT uconst {
uint32_t nDefinitionLineNo = lexer_GetLineNo();
- char *body;
+ char const *body;
size_t size;
lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR,
&body, &size, "REPT block");
@@ -614,7 +614,7 @@
macrodef : T_LABEL ':' T_POP_MACRO {
int32_t nDefinitionLineNo = lexer_GetLineNo();
- char *body;
+ char const *body;
size_t size;
lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM,
&body, &size, "macro definition");
--- a/src/asm/fstack.c
+++ b/src/asm/fstack.c
@@ -250,7 +250,8 @@
pLastFile = pLastFile->next;
}
- fprintf(stderr, "%s(%" PRId32 ")", lexer_GetFileName(), lexer_GetLineNo());
+ fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")",
+ lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo());
}
void fstk_DumpToStr(char *buf, size_t buflen)
--- a/src/asm/globlex.c
+++ /dev/null
@@ -1,698 +1,0 @@
-/*
- * This file is part of RGBDS.
- *
- * Copyright (c) 1997-2018, Carsten Sorensen and RGBDS contributors.
- *
- * SPDX-License-Identifier: MIT
- */
-
-#include <math.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "asm/asm.h"
-#include "asm/lexer.h"
-#include "asm/macro.h"
-#include "asm/main.h"
-#include "asm/rpn.h"
-#include "asm/section.h"
-#include "asm/warning.h"
-
-#include "helpers.h"
-
-#include "asmy.h"
-
-bool oDontExpandStrings;
-int32_t nGBGfxID = -1;
-int32_t nBinaryID = -1;
-
-static int32_t gbgfx2bin(char ch)
-{
- int32_t i;
-
- for (i = 0; i <= 3; i++) {
- if (CurrentOptions.gbgfx[i] == ch)
- return i;
- }
-
- return 0;
-}
-
-static int32_t binary2bin(char ch)
-{
- int32_t i;
-
- for (i = 0; i <= 1; i++) {
- if (CurrentOptions.binary[i] == ch)
- return i;
- }
-
- return 0;
-}
-
-static int32_t char2bin(char ch)
-{
- if (ch >= 'a' && ch <= 'f')
- return (ch - 'a' + 10);
-
- if (ch >= 'A' && ch <= 'F')
- return (ch - 'A' + 10);
-
- if (ch >= '0' && ch <= '9')
- return (ch - '0');
-
- return 0;
-}
-
-typedef int32_t(*x2bin) (char ch);
-
-static int32_t ascii2bin(char *s)
-{
- char *start = s;
- uint32_t radix = 10;
- uint32_t result = 0;
- x2bin convertfunc = char2bin;
-
- switch (*s) {
- case '$':
- radix = 16;
- s++;
- convertfunc = char2bin;
- break;
- case '&':
- radix = 8;
- s++;
- convertfunc = char2bin;
- break;
- case '`':
- radix = 4;
- s++;
- convertfunc = gbgfx2bin;
- break;
- case '%':
- radix = 2;
- s++;
- convertfunc = binary2bin;
- break;
- default:
- /* Handle below */
- break;
- }
-
- const uint32_t max_q = UINT32_MAX / radix;
- const uint32_t max_r = UINT32_MAX % radix;
-
- if (*s == '\0') {
- /*
- * There are no digits after the radix prefix
- * (or the string is empty, which shouldn't happen).
- */
- error("Invalid integer constant\n");
- } else if (radix == 4) {
- int32_t size = 0;
- int32_t c;
-
- while (*s != '\0') {
- c = convertfunc(*s++);
- result = result * 2 + ((c & 2) << 7) + (c & 1);
- size++;
- }
-
- /*
- * Extending a graphics constant longer than 8 pixels,
- * the Game Boy tile width, produces a nonsensical result.
- */
- if (size > 8) {
- warning(WARNING_LARGE_CONSTANT, "Graphics constant '%s' is too long\n",
- start);
- }
- } else {
- bool overflow = false;
-
- while (*s != '\0') {
- int32_t digit = convertfunc(*s++);
-
- if (result > max_q
- || (result == max_q && digit > max_r)) {
- overflow = true;
- }
- result = result * radix + digit;
- }
-
- if (overflow)
- warning(WARNING_LARGE_CONSTANT, "Integer constant '%s' is too large\n",
- start);
- }
-
- return result;
-}
-
-uint32_t ParseFixedPoint(char *s, uint32_t size)
-{
- uint32_t i;
- uint32_t dot = 0;
-
- for (i = 0; i < size; i++) {
- if (s[i] == '.') {
- dot++;
-
- if (dot == 2)
- break;
- }
- }
-
- yyskipbytes(i);
-
- yylval.nConstValue = (int32_t)(atof(s) * 65536);
-
- return 1;
-}
-
-uint32_t ParseNumber(char *s, uint32_t size)
-{
- char dest[256];
-
- if (size > 255)
- fatalerror("Number token too long\n");
-
- strncpy(dest, s, size);
- dest[size] = 0;
- yylval.nConstValue = ascii2bin(dest);
-
- yyskipbytes(size);
-
- return 1;
-}
-
-/*
- * If the symbol name ends before the end of the macro arg,
- * return a pointer to the rest of the macro arg.
- * Otherwise, return NULL.
- */
-char const *AppendMacroArg(char whichArg, char *dest, size_t *destIndex)
-{
- char const *marg;
-
- if (whichArg == '@')
- marg = macro_GetUniqueIDStr();
- else if (whichArg >= '1' && whichArg <= '9')
- marg = macro_GetArg(whichArg - '0');
- else
- fatalerror("Invalid macro argument '\\%c' in symbol\n", whichArg);
-
- if (!marg)
- fatalerror("Macro argument '\\%c' not defined\n", whichArg);
-
- char ch;
-
- while ((ch = *marg) != 0) {
- if ((ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z')
- || (ch >= '0' && ch <= '9')
- || ch == '_'
- || ch == '@'
- || ch == '#'
- || ch == '.') {
- if (*destIndex >= MAXSYMLEN)
- fatalerror("Symbol too long\n");
-
- dest[*destIndex] = ch;
- (*destIndex)++;
- } else {
- return marg;
- }
-
- marg++;
- }
-
- return NULL;
-}
-
-uint32_t ParseSymbol(char *src, uint32_t size)
-{
- char dest[MAXSYMLEN + 1];
- size_t srcIndex = 0;
- size_t destIndex = 0;
- char const *rest = NULL;
-
- while (srcIndex < size) {
- char ch = src[srcIndex++];
-
- if (ch == '\\') {
- /*
- * We don't check if srcIndex is still less than size,
- * but that can only fail to be true when the
- * following char is neither '@' nor a digit.
- * In that case, AppendMacroArg() will catch the error.
- */
- ch = src[srcIndex++];
-
- rest = AppendMacroArg(ch, dest, &destIndex);
- /* If the symbol's end was in the middle of the token */
- if (rest)
- break;
- } else {
- if (destIndex >= MAXSYMLEN)
- fatalerror("Symbol too long\n");
- dest[destIndex++] = ch;
- }
- }
-
- dest[destIndex] = 0;
-
- /* Tell the lexer we read all bytes that we did */
- yyskipbytes(srcIndex);
-
- /*
- * If an escape's expansion left some chars after the symbol's end,
- * such as the `::` in a `Backup\1` expanded to `BackupCamX::`,
- * put those into the buffer.
- * Note that this NEEDS to be done after the `yyskipbytes` above.
- */
- if (rest)
- yyunputstr(rest);
-
- /* If the symbol is an EQUS, expand it */
- if (!oDontExpandStrings) {
- struct Symbol const *sym = sym_FindSymbol(dest);
-
- if (sym && sym->type == SYM_EQUS) {
- char const *s;
-
- lex_BeginStringExpansion(dest);
-
- /* Feed the symbol's contents into the buffer */
- yyunputstr(s = sym_GetStringValue(sym));
-
- /* Lines inserted this way shall not increase lexer_GetLineNo() */
- while (*s) {
- if (*s++ == '\n')
- lexer_GetLineNo()--;
- }
- return 0;
- }
- }
-
- strcpy(yylval.tzSym, dest);
- return 1;
-}
-
-uint32_t PutMacroArg(char *src, uint32_t size)
-{
- char const *s;
-
- yyskipbytes(size);
- if ((size == 2 && src[1] >= '1' && src[1] <= '9')) {
- s = macro_GetArg(src[1] - '0');
-
- if (s != NULL)
- yyunputstr(s);
- else
- error("Macro argument '\\%c' not defined\n", src[1]);
- } else {
- error("Invalid macro argument '\\%c'\n", src[1]);
- }
- return 0;
-}
-
-uint32_t PutUniqueID(char *src, uint32_t size)
-{
- (void)src;
- char const *s;
-
- yyskipbytes(size);
-
- s = macro_GetUniqueIDStr();
-
- if (s != NULL)
- yyunputstr(s);
- else
- error("Macro unique label string not defined\n");
-
- return 0;
-}
-
-enum {
- T_LEX_MACROARG = 3000,
- T_LEX_MACROUNIQUE
-};
-
-const struct sLexInitString lexer_strings[] = {
- {"adc", T_Z80_ADC},
- {"add", T_Z80_ADD},
- {"and", T_Z80_AND},
- {"bit", T_Z80_BIT},
- {"call", T_Z80_CALL},
- {"ccf", T_Z80_CCF},
- {"cpl", T_Z80_CPL},
- {"cp", T_Z80_CP},
- {"daa", T_Z80_DAA},
- {"dec", T_Z80_DEC},
- {"di", T_Z80_DI},
- {"ei", T_Z80_EI},
- {"halt", T_Z80_HALT},
- {"inc", T_Z80_INC},
- {"jp", T_Z80_JP},
- {"jr", T_Z80_JR},
- {"ld", T_Z80_LD},
- {"ldi", T_Z80_LDI},
- {"ldd", T_Z80_LDD},
- {"ldio", T_Z80_LDIO},
- {"ldh", T_Z80_LDIO},
- {"nop", T_Z80_NOP},
- {"or", T_Z80_OR},
- {"pop", T_Z80_POP},
- {"push", T_Z80_PUSH},
- {"res", T_Z80_RES},
- {"reti", T_Z80_RETI},
- {"ret", T_Z80_RET},
- {"rlca", T_Z80_RLCA},
- {"rlc", T_Z80_RLC},
- {"rla", T_Z80_RLA},
- {"rl", T_Z80_RL},
- {"rrc", T_Z80_RRC},
- {"rrca", T_Z80_RRCA},
- {"rra", T_Z80_RRA},
- {"rr", T_Z80_RR},
- {"rst", T_Z80_RST},
- {"sbc", T_Z80_SBC},
- {"scf", T_Z80_SCF},
- {"set", T_POP_SET},
- {"sla", T_Z80_SLA},
- {"sra", T_Z80_SRA},
- {"srl", T_Z80_SRL},
- {"stop", T_Z80_STOP},
- {"sub", T_Z80_SUB},
- {"swap", T_Z80_SWAP},
- {"xor", T_Z80_XOR},
-
- {"nz", T_CC_NZ},
- {"z", T_CC_Z},
- {"nc", T_CC_NC},
- /* Handled in list of registers */
- /* { "c", T_TOKEN_C }, */
-
- {"hli", T_MODE_HL_INC},
- {"hld", T_MODE_HL_DEC},
- {"$ff00+c", T_MODE_HW_C},
- {"$ff00 + c", T_MODE_HW_C},
- {"af", T_MODE_AF},
- {"bc", T_MODE_BC},
- {"de", T_MODE_DE},
- {"hl", T_MODE_HL},
- {"sp", T_MODE_SP},
-
- {"a", T_TOKEN_A},
- {"b", T_TOKEN_B},
- {"c", T_TOKEN_C},
- {"d", T_TOKEN_D},
- {"e", T_TOKEN_E},
- {"h", T_TOKEN_H},
- {"l", T_TOKEN_L},
-
- {"||", T_OP_LOGICOR},
- {"&&", T_OP_LOGICAND},
- {"==", T_OP_LOGICEQU},
- {">", T_OP_LOGICGT},
- {"<", T_OP_LOGICLT},
- {">=", T_OP_LOGICGE},
- {"<=", T_OP_LOGICLE},
- {"!=", T_OP_LOGICNE},
- {"!", T_OP_LOGICNOT},
- {"|", T_OP_OR},
- {"^", T_OP_XOR},
- {"&", T_OP_AND},
- {"<<", T_OP_SHL},
- {">>", T_OP_SHR},
- {"+", T_OP_ADD},
- {"-", T_OP_SUB},
- {"*", T_OP_MUL},
- {"/", T_OP_DIV},
- {"%", T_OP_MOD},
- {"~", T_OP_NOT},
-
- {"def", T_OP_DEF},
-
- {"fragment", T_POP_FRAGMENT},
- {"bank", T_OP_BANK},
- {"align", T_OP_ALIGN},
-
- {"round", T_OP_ROUND},
- {"ceil", T_OP_CEIL},
- {"floor", T_OP_FLOOR},
- {"div", T_OP_FDIV},
- {"mul", T_OP_FMUL},
- {"sin", T_OP_SIN},
- {"cos", T_OP_COS},
- {"tan", T_OP_TAN},
- {"asin", T_OP_ASIN},
- {"acos", T_OP_ACOS},
- {"atan", T_OP_ATAN},
- {"atan2", T_OP_ATAN2},
-
- {"high", T_OP_HIGH},
- {"low", T_OP_LOW},
- {"isconst", T_OP_ISCONST},
-
- {"strcmp", T_OP_STRCMP},
- {"strin", T_OP_STRIN},
- {"strsub", T_OP_STRSUB},
- {"strlen", T_OP_STRLEN},
- {"strcat", T_OP_STRCAT},
- {"strupr", T_OP_STRUPR},
- {"strlwr", T_OP_STRLWR},
-
- {"include", T_POP_INCLUDE},
- {"printt", T_POP_PRINTT},
- {"printi", T_POP_PRINTI},
- {"printv", T_POP_PRINTV},
- {"printf", T_POP_PRINTF},
- {"export", T_POP_EXPORT},
- {"xdef", T_POP_XDEF},
- {"global", T_POP_GLOBAL},
- {"ds", T_POP_DS},
- {"db", T_POP_DB},
- {"dw", T_POP_DW},
- {"dl", T_POP_DL},
- {"section", T_POP_SECTION},
- {"purge", T_POP_PURGE},
-
- {"rsreset", T_POP_RSRESET},
- {"rsset", T_POP_RSSET},
-
- {"incbin", T_POP_INCBIN},
- {"charmap", T_POP_CHARMAP},
- {"newcharmap", T_POP_NEWCHARMAP},
- {"setcharmap", T_POP_SETCHARMAP},
- {"pushc", T_POP_PUSHC},
- {"popc", T_POP_POPC},
-
- {"fail", T_POP_FAIL},
- {"warn", T_POP_WARN},
- {"fatal", T_POP_FATAL},
- {"assert", T_POP_ASSERT},
- {"static_assert", T_POP_STATIC_ASSERT},
-
- {"macro", T_POP_MACRO},
- /* Not needed but we have it here just to protect the name */
- {"endm", T_POP_ENDM},
- {"shift", T_POP_SHIFT},
-
- {"rept", T_POP_REPT},
- /* Not needed but we have it here just to protect the name */
- {"endr", T_POP_ENDR},
-
- {"load", T_POP_LOAD},
- {"endl", T_POP_ENDL},
-
- {"if", T_POP_IF},
- {"else", T_POP_ELSE},
- {"elif", T_POP_ELIF},
- {"endc", T_POP_ENDC},
-
- {"union", T_POP_UNION},
- {"nextu", T_POP_NEXTU},
- {"endu", T_POP_ENDU},
-
- {"wram0", T_SECT_WRAM0},
- {"vram", T_SECT_VRAM},
- {"romx", T_SECT_ROMX},
- {"rom0", T_SECT_ROM0},
- {"hram", T_SECT_HRAM},
- {"wramx", T_SECT_WRAMX},
- {"sram", T_SECT_SRAM},
- {"oam", T_SECT_OAM},
-
- {"rb", T_POP_RB},
- {"rw", T_POP_RW},
- {"equ", T_POP_EQU},
- {"equs", T_POP_EQUS},
-
- /* Handled before in list of CPU instructions */
- /* {"set", T_POP_SET}, */
- {"=", T_POP_EQUAL},
-
- {"pushs", T_POP_PUSHS},
- {"pops", T_POP_POPS},
- {"pusho", T_POP_PUSHO},
- {"popo", T_POP_POPO},
-
- {"opt", T_POP_OPT},
-
- {NULL, 0}
-};
-
-const struct sLexFloat tNumberToken = {
- ParseNumber,
- T_NUMBER
-};
-
-const struct sLexFloat tFixedPointToken = {
- ParseFixedPoint,
- T_NUMBER
-};
-
-const struct sLexFloat tIDToken = {
- ParseSymbol,
- T_ID
-};
-
-const struct sLexFloat tMacroArgToken = {
- PutMacroArg,
- T_LEX_MACROARG
-};
-
-const struct sLexFloat tMacroUniqueToken = {
- PutUniqueID,
- T_LEX_MACROUNIQUE
-};
-
-void setup_lexer(void)
-{
- uint32_t id;
-
- lex_Init();
- lex_AddStrings(lexer_strings);
-
- //Macro arguments
-
- id = lex_FloatAlloc(&tMacroArgToken);
- lex_FloatAddFirstRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '1', '9');
- id = lex_FloatAlloc(&tMacroUniqueToken);
- lex_FloatAddFirstRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '@', '@');
-
- //Decimal constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddRange(id, '0', '9');
-
- //Binary constants
-
- id = lex_FloatAlloc(&tNumberToken);
- nBinaryID = id;
- lex_FloatAddFirstRange(id, '%', '%');
- lex_FloatAddSecondRange(id, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddSecondRange(id, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
- lex_FloatAddRange(id, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddRange(id, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
-
- //Octal constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '&', '&');
- lex_FloatAddSecondRange(id, '0', '7');
- lex_FloatAddRange(id, '0', '7');
-
- //Gameboy gfx constants
-
- id = lex_FloatAlloc(&tNumberToken);
- nGBGfxID = id;
- lex_FloatAddFirstRange(id, '`', '`');
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[0],
- CurrentOptions.gbgfx[0]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[1],
- CurrentOptions.gbgfx[1]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[2],
- CurrentOptions.gbgfx[2]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[3],
- CurrentOptions.gbgfx[3]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[0], CurrentOptions.gbgfx[0]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[1], CurrentOptions.gbgfx[1]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[2], CurrentOptions.gbgfx[2]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[3], CurrentOptions.gbgfx[3]);
-
- //Hex constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '$', '$');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddSecondRange(id, 'A', 'F');
- lex_FloatAddSecondRange(id, 'a', 'f');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, 'A', 'F');
- lex_FloatAddRange(id, 'a', 'f');
-
- //ID 's
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, 'a', 'z');
- lex_FloatAddFirstRange(id, 'A', 'Z');
- lex_FloatAddFirstRange(id, '_', '_');
- lex_FloatAddSecondRange(id, '.', '.');
- lex_FloatAddSecondRange(id, 'a', 'z');
- lex_FloatAddSecondRange(id, 'A', 'Z');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '_', '_');
- lex_FloatAddSecondRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '@', '@');
- lex_FloatAddSecondRange(id, '#', '#');
- lex_FloatAddRange(id, '.', '.');
- lex_FloatAddRange(id, 'a', 'z');
- lex_FloatAddRange(id, 'A', 'Z');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, '_', '_');
- lex_FloatAddRange(id, '\\', '\\');
- lex_FloatAddRange(id, '@', '@');
- lex_FloatAddRange(id, '#', '#');
-
- //Local ID
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, '.', '.');
- lex_FloatAddSecondRange(id, 'a', 'z');
- lex_FloatAddSecondRange(id, 'A', 'Z');
- lex_FloatAddSecondRange(id, '_', '_');
- lex_FloatAddRange(id, 'a', 'z');
- lex_FloatAddRange(id, 'A', 'Z');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, '_', '_');
- lex_FloatAddRange(id, '\\', '\\');
- lex_FloatAddRange(id, '@', '@');
- lex_FloatAddRange(id, '#', '#');
-
- // "@"
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, '@', '@');
-
- //Fixed point constants
-
- id = lex_FloatAlloc(&tFixedPointToken);
- lex_FloatAddFirstRange(id, '.', '.');
- lex_FloatAddFirstRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '.', '.');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddRange(id, '.', '.');
- lex_FloatAddRange(id, '0', '9');
-}
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -30,6 +30,13 @@
/* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */
static_assert(LEXER_BUF_SIZE <= SSIZE_MAX);
+struct Expansion {
+ uint8_t distance; /* How far the expansion's beginning is from the current position */
+ char const *contents;
+ size_t len;
+ struct Expansion *parent;
+};
+
struct LexerState {
char const *path;
@@ -37,7 +44,7 @@
bool isMmapped;
union {
struct { /* If mmap()ed */
- char *ptr;
+ char *ptr; /* Technically `const` during the lexer's execution */
off_t size;
off_t offset;
};
@@ -44,7 +51,6 @@
struct { /* Otherwise */
int fd;
size_t index; /* Read index into the buffer */
- size_t nbChars; /* Number of chars in front of the buffer */
char buf[LEXER_BUF_SIZE]; /* Circular buffer */
};
};
@@ -52,12 +58,17 @@
/* Common state */
enum LexerMode mode;
bool atLineStart;
- unsigned int lineNo;
+ uint32_t lineNo;
+ uint32_t colNo;
+
bool capturing; /* Whether the text being lexed should be captured */
size_t captureSize; /* Amount of text captured */
char *captureBuf; /* Buffer to send the captured text to if non-NULL */
size_t captureCapacity; /* Size of the buffer above */
+
+ size_t nbChars; /* Number of chars of lookahead, for processing expansions */
bool expandStrings;
+ struct Expansion *expansion;
};
struct LexerState *lexerState = NULL;
@@ -116,14 +127,18 @@
/* Sometimes mmap() fails or isn't available, so have a fallback */
lseek(state->fd, 0, SEEK_SET);
state->index = 0;
- state->nbChars = 0;
}
state->mode = LEXER_NORMAL;
- state->atLineStart = true;
+ state->atLineStart = true; /* yylex() will init colNo due to this */
state->lineNo = 0;
+
state->capturing = false;
state->captureBuf = NULL;
+
+ state->nbChars = 0;
+ state->expandStrings = true;
+ state->expansion = NULL;
return state;
}
@@ -164,28 +179,50 @@
/* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */
static int peek(uint8_t distance)
{
+ if (distance >= LEXER_BUF_SIZE)
+ fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n",
+ distance, LEXER_BUF_SIZE);
+
if (lexerState->isMmapped) {
if (lexerState->offset + distance >= lexerState->size)
return EOF;
+
+ if (!lexerState->capturing) {
+ bool escaped = false;
+
+ while (lexerState->nbChars < distance && !escaped) {
+ char c = lexerState->ptr[lexerState->offset
+ + lexerState->nbChars++];
+
+ if (escaped) {
+ escaped = false;
+ if ((c >= '1' && c <= '9') || c == '@')
+ fatalerror("Macro arg expansion is not implemented yet\n");
+ } else if (c == '\\') {
+ escaped = true;
+ }
+ }
+ }
+
return lexerState->ptr[lexerState->offset + distance];
}
if (lexerState->nbChars <= distance) {
/* Buffer isn't full enough, read some chars in */
+ size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */
/* Compute the index we'll start writing to */
size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE;
- size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */
- ssize_t nbCharsRead = 0;
+ ssize_t nbCharsRead = 0, totalCharsRead = 0;
#define readChars(size) do { \
nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \
if (nbCharsRead == -1) \
fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \
+ totalCharsRead += nbCharsRead; \
writeIndex += nbCharsRead; \
if (writeIndex == LEXER_BUF_SIZE) \
writeIndex = 0; \
- lexerState->nbChars += nbCharsRead; /* Count all those chars in */ \
target -= nbCharsRead; \
} while (0)
@@ -201,6 +238,40 @@
#undef readChars
+ /* Do not perform expansions when capturing */
+ if (!lexerState->capturing) {
+ /* Scan the newly-inserted chars for any expansions */
+ bool escaped = false;
+ size_t index = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE;
+
+ for (ssize_t i = 0; i < totalCharsRead; i++) {
+ char c = lexerState->buf[index++];
+
+ if (escaped) {
+ escaped = false;
+ if ((c >= '1' && c <= '9') || c == '@')
+ fatalerror("Macro arg expansion is not implemented yet\n");
+ } else if (c == '\\') {
+ escaped = true;
+ }
+ if (index == LEXER_BUF_SIZE) /* Wrap around buffer */
+ index = 0;
+ }
+
+ /*
+ * If last char read was a backslash, pretend we didn't read it; this is
+ * important, otherwise we may miss an expansion that straddles refills
+ */
+ if (escaped) {
+ totalCharsRead--;
+ /* However, if that prevents having enough characters, error out */
+ if (lexerState->nbChars + totalCharsRead <= distance)
+ fatalerror("Internal lexer error: cannot read far enough due to backslash\n");
+ }
+ }
+
+ lexerState->nbChars += totalCharsRead;
+
/* If there aren't enough chars even after refilling, give up */
if (lexerState->nbChars <= distance)
return EOF;
@@ -231,6 +302,8 @@
if (lexerState->index >= LEXER_BUF_SIZE)
lexerState->index %= LEXER_BUF_SIZE;
}
+
+ lexerState->colNo += distance;
}
static int nextChar(void)
@@ -250,11 +323,16 @@
return lexerState->path;
}
-unsigned int lexer_GetLineNo(void)
+uint32_t lexer_GetLineNo(void)
{
return lexerState->lineNo;
}
+uint32_t lexer_GetColNo(void)
+{
+ return lexerState->colNo;
+}
+
void lexer_DumpStringExpansions(void)
{
/* TODO */
@@ -278,6 +356,20 @@
case '\t':
break;
+ /* Handle single-char tokens */
+ case '+':
+ return T_OP_ADD;
+ case '-':
+ return T_OP_SUB;
+
+ /* Handle accepted single chars */
+ case '[':
+ case ']':
+ case '(':
+ case ')':
+ case ',':
+ return c;
+
case EOF:
/* Captures end at their buffer's boundary no matter what */
if (!lexerState->capturing) {
@@ -288,6 +380,7 @@
default:
error("Unknown character '%c'\n");
}
+ lexerState->atLineStart = false;
}
}
@@ -298,8 +391,10 @@
int yylex(void)
{
- if (lexerState->atLineStart)
+ if (lexerState->atLineStart) {
lexerState->lineNo++;
+ lexerState->colNo = 0;
+ }
static int (* const lexerModeFuncs[])(void) = {
[LEXER_NORMAL] = yylex_NORMAL,
@@ -316,7 +411,7 @@
}
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken,
- char **capture, size_t *size, char const *name)
+ char const **capture, size_t *size, char const *name)
{
lexerState->capturing = true;
lexerState->captureSize = 0;
--- a/src/asm/main.c
+++ b/src/asm/main.c
@@ -483,6 +483,13 @@
fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile);
}
+ /* Init lexer; important to do first, since that's what provides the file name, line, etc */
+ struct LexerState *state = lexer_OpenFile(tzMainfile);
+
+ if (!state)
+ fatalerror("Failed to open main file!\n");
+ lexer_SetState(state);
+
nStartClock = clock();
nTotalLines = 0;
@@ -490,11 +497,6 @@
sym_Init();
sym_SetExportAll(exportall);
fstk_Init(tzMainfile);
- struct LexerState *state = lexer_OpenFile(tzMainfile);
-
- if (!state)
- fatalerror("Failed to open main file!");
- lexer_SetState(state);
opt_ParseDefines();
charmap_New("main", NULL);