shithub: rgbds

Download patch

ref: 7ac8bd6e240618fbfacfe9f0987bf089b7721b9c
parent: be2572edca1eb97f5ca801d4e4380e757b04f314
author: Rangi <[email protected]>
date: Sun Apr 18 16:25:09 EDT 2021

Return a marker token at the end of any buffer

Removes the lexer hack mentioned in #778

--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -81,6 +81,7 @@
 	uint32_t lineNo;
 	char *body;
 	size_t size;
+	bool unterminated;
 };
 
 char const *lexer_GetFileName(void);
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -354,6 +354,7 @@
 	uint32_t colNo;
 	int lastToken;
 	int nextToken;
+	bool isAtEOF;
 
 	struct IfStack *ifStack;
 
@@ -378,6 +379,7 @@
 	state->atLineStart = true; /* yylex() will init colNo due to this */
 	state->lastToken = T_EOF;
 	state->nextToken = 0;
+	state->isAtEOF = false;
 
 	state->ifStack = NULL;
 
@@ -2278,11 +2280,13 @@
 
 int yylex(void)
 {
-restart:
-	if (lexerState->atLineStart && lexerStateEOL) {
+	if (lexerStateEOL) {
 		lexer_SetState(lexerStateEOL);
 		lexerStateEOL = NULL;
 	}
+	/* `lexer_SetState` updates `lexerState`, so check for EOF after it */
+	if (lexerState->isAtEOF)
+		return T_EOF;
 	if (lexerState->atLineStart) {
 		/* Newlines read within an expansion should not increase the line count */
 		if (!lexerState->expansions)
@@ -2299,23 +2303,19 @@
 	int token = lexerModeFuncs[lexerState->mode]();
 
 	if (token == T_EOF) {
-		if (lexerState->lastToken != T_NEWLINE) {
-			dbgPrint("Forcing EOL at EOF\n");
-			token = T_NEWLINE;
-		} else {
-			/* Try to switch to new buffer; if it succeeds, scan again */
-			dbgPrint("Reached EOF!\n");
-			/* Captures end at their buffer's boundary no matter what */
-			if (!lexerState->capturing) {
-				if (!yywrap())
-					goto restart;
+		/* Try to switch to new buffer; if it succeeds, scan again */
+		dbgPrint("Reached EOB!\n");
+		/* Captures end at their buffer's boundary no matter what */
+		if (!lexerState->capturing) {
+			if (yywrap()) {
 				dbgPrint("Reached end of input.\n");
-				return T_EOF;
+				lexerState->isAtEOF = true;
 			}
+			token = T_EOB;
 		}
 	}
 	lexerState->lastToken = token;
-	lexerState->atLineStart = token == T_NEWLINE;
+	lexerState->atLineStart = token == T_NEWLINE || token == T_EOB;
 
 	return token;
 }
@@ -2338,6 +2338,7 @@
 
 void lexer_CaptureRept(struct CaptureBody *capture)
 {
+	capture->unterminated = false;
 	capture->lineNo = lexer_GetLineNo();
 
 	char *captureStart = startCapture();
@@ -2372,7 +2373,6 @@
 					 * We know we have read exactly "ENDR", not e.g. an EQUS
 					 */
 					lexerState->captureSize -= strlen("ENDR");
-					lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
 					goto finish;
 				}
 				level--;
@@ -2383,6 +2383,7 @@
 		for (;;) {
 			if (c == EOF) {
 				error("Unterminated REPT/FOR block\n");
+				capture->unterminated = true;
 				goto finish;
 			} else if (c == '\n' || c == '\r') {
 				handleCRLF(c);
@@ -2404,6 +2405,7 @@
 
 void lexer_CaptureMacroBody(struct CaptureBody *capture)
 {
+	capture->unterminated = false;
 	capture->lineNo = lexer_GetLineNo();
 
 	char *captureStart = startCapture();
@@ -2434,7 +2436,6 @@
 				 * We know we have read exactly "ENDM", not e.g. an EQUS
 				 */
 				lexerState->captureSize -= strlen("ENDM");
-				lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
 				goto finish;
 			}
 		}
@@ -2443,6 +2444,7 @@
 		for (;;) {
 			if (c == EOF) {
 				error("Unterminated macro definition\n");
+				capture->unterminated = true;
 				goto finish;
 			} else if (c == '\n' || c == '\r') {
 				handleCRLF(c);
--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -646,6 +646,7 @@
 %type	<expr>		op_mem_ind
 %type	<assertType>	assert_type
 
+%token T_EOB "end of buffer"
 %token T_EOF 0 "end of file"
 %start asmfile
 
@@ -654,14 +655,13 @@
 asmfile		: lines
 ;
 
-/*
- * The lexer adds T_NEWLINE at the end of the file if one was not
- * already present, so we can rely on it to end a line.
- */
 lines		: %empty
 		| lines line
 ;
 
+endofline	: T_NEWLINE | T_EOB
+;
+
 plain_directive	: label
 		| label cpu_command
 		| label macro
@@ -669,9 +669,9 @@
 		| assignment_directive
 ;
 
-line		: plain_directive T_NEWLINE
+line		: plain_directive endofline
 		| line_directive /* Directives that manage newlines themselves */
-		| error T_NEWLINE { /* Continue parsing the next line on a syntax error */
+		| error endofline { /* Continue parsing the next line on a syntax error */
 			fstk_StopRept();
 		}
 ;
@@ -686,6 +686,7 @@
 		| rept
 		| for
 		| break
+		| include
 		| if
 		/* It's important that all of these require being at line start for `skipIfBlock` */
 		| elif
@@ -807,8 +808,7 @@
 		| equs
 ;
 
-directive	: include
-		| endc
+directive	: endc
 		| print
 		| println
 		| printf
@@ -988,8 +988,10 @@
 
 rept		: T_POP_REPT uconst T_NEWLINE {
 			lexer_CaptureRept(&captureBody);
-		} T_NEWLINE {
-			fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size);
+		} endofline {
+			if (!captureBody.unterminated)
+				fstk_RunRept($2, captureBody.lineNo, captureBody.body,
+					     captureBody.size);
 		}
 ;
 
@@ -999,9 +1001,10 @@
 			lexer_ToggleStringExpansion(true);
 		} T_COMMA for_args T_NEWLINE {
 			lexer_CaptureRept(&captureBody);
-		} T_NEWLINE {
-			fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo,
-				    captureBody.body, captureBody.size);
+		} endofline {
+			if (!captureBody.unterminated)
+				fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo,
+					    captureBody.body, captureBody.size);
 		}
 
 for_args	: const {
@@ -1021,7 +1024,7 @@
 		}
 ;
 
-break		: T_POP_BREAK T_NEWLINE {
+break		: label T_POP_BREAK endofline {
 			if (fstk_Break())
 				lexer_SetMode(LEXER_SKIP_TO_ENDR);
 		}
@@ -1033,13 +1036,17 @@
 			lexer_ToggleStringExpansion(true);
 		} T_NEWLINE {
 			lexer_CaptureMacroBody(&captureBody);
-		} T_NEWLINE {
-			sym_AddMacro($3, captureBody.lineNo, captureBody.body, captureBody.size);
+		} endofline {
+			if (!captureBody.unterminated)
+				sym_AddMacro($3, captureBody.lineNo, captureBody.body,
+					     captureBody.size);
 		}
 		| T_LABEL T_COLON T_POP_MACRO T_NEWLINE {
 			lexer_CaptureMacroBody(&captureBody);
-		} T_NEWLINE {
-			sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size);
+		} endofline {
+			if (!captureBody.unterminated)
+				sym_AddMacro($1, captureBody.lineNo, captureBody.body,
+					     captureBody.size);
 		}
 ;
 
@@ -1162,8 +1169,8 @@
 export_list_entry : scoped_id	{ sym_Export($1); }
 ;
 
-include		: T_POP_INCLUDE string {
-			fstk_RunInclude($2);
+include		: label T_POP_INCLUDE string endofline {
+			fstk_RunInclude($3);
 			if (failedOnMissingInclude)
 				YYACCEPT;
 		}
--- a/test/asm/block-comment-termination-error.err
+++ b/test/asm/block-comment-termination-error.err
@@ -1,5 +1,5 @@
 ERROR: block-comment-termination-error.asm(1):
     Unterminated block comment
 ERROR: block-comment-termination-error.asm(1):
-    syntax error, unexpected newline
+    syntax error, unexpected end of buffer
 error: Assembly aborted (2 errors)!
--- a/test/asm/code-after-endm-endr-endc.err
+++ b/test/asm/code-after-endm-endr-endc.err
@@ -1,15 +1,15 @@
 ERROR: code-after-endm-endr-endc.asm(6):
-    syntax error, unexpected PRINTLN, expecting newline
+    syntax error, unexpected PRINTLN, expecting newline or end of buffer
 ERROR: code-after-endm-endr-endc.asm(7):
     Macro "mac" not defined
 ERROR: code-after-endm-endr-endc.asm(12):
-    syntax error, unexpected PRINTLN, expecting newline
+    syntax error, unexpected PRINTLN, expecting newline or end of buffer
 ERROR: code-after-endm-endr-endc.asm(17):
     syntax error, unexpected PRINTLN, expecting newline
 ERROR: code-after-endm-endr-endc.asm(19):
-    syntax error, unexpected PRINTLN, expecting newline
+    syntax error, unexpected PRINTLN, expecting newline or end of buffer
 ERROR: code-after-endm-endr-endc.asm(23):
     syntax error, unexpected PRINTLN, expecting newline
 ERROR: code-after-endm-endr-endc.asm(25):
-    syntax error, unexpected PRINTLN, expecting newline
+    syntax error, unexpected PRINTLN, expecting newline or end of buffer
 error: Assembly aborted (7 errors)!
--- a/test/asm/nested-macrodef.err
+++ b/test/asm/nested-macrodef.err
@@ -3,5 +3,5 @@
 ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
     Unterminated macro definition
 ERROR: nested-macrodef.asm(27):
-    syntax error, unexpected identifier, expecting newline
+    Macro "inner" not defined
 error: Assembly aborted (2 errors)!