shithub: rgbds

Download patch

ref: d049ffc0f0a508f9d48d27e850692f54b6c062f8
parent: 8c0275480c680ce610c9f4ce7d121197152a879c
author: Rangi <[email protected]>
date: Tue Feb 16 14:44:25 EST 2021

Handle string literals within macro arguments (#685)

Fixes #683 and #691

The lexer's raw mode for reading macro args already attempted
to handle semicolons inside string literals, versus outside ones
which start comments. This change reuses the same function for
reading string literals in normal and raw modes, also handling:

- Commas in strings versus between macro args
- Character escapes
- {Interpolations} and \1-\9 args inside vs. outside strings
- Multi-line string literals

Macro args now allow escaping '\', '"', and '\\'.

A consistent model for expanding macro args and interpolations,
within macro args, string literals, and normal context:

- "{S}" should always equal the contents of S
- "\1" should always act like quoting the value of \1

--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -845,7 +845,6 @@
 			}
 		} else if (c == '{' && !lexerState->disableInterpolation) {
 			/* If character is an open brace, do symbol interpolation */
-			lexerState->macroArgScanDistance++;
 			shiftChars(1);
 			char const *ptr = readInterpolation();
 
@@ -1247,7 +1246,7 @@
 	yylval.nConstValue = bp1 << 8 | bp0;
 }
 
-/* Function to read identifiers & keywords */
+/* Functions to read identifiers & keywords */
 
 static bool startsIdentifier(int c)
 {
@@ -1373,51 +1372,39 @@
 	return NULL;
 }
 
-static int appendMacroArg(char const *str, int i)
+#define append_yylval_tzString(c) do { \
+	if (i < sizeof(yylval.tzString)) \
+		yylval.tzString[i++] = (c); \
+} while (0)
+
+static size_t appendEscapedSubstring(char const *str, size_t i)
 {
-	while (*str && i < sizeof(yylval.tzString)) {
+	/* Copy one extra to flag overflow */
+	while (*str) {
 		int c = *str++;
 
-		if (c != '\\') {
-			yylval.tzString[i++] = c;
-			continue;
-		}
-
-		c = *str++;
-
+		/* Escape characters that need escaping */
 		switch (c) {
-		case '\\': /* Return that character unchanged */
+		case '\\':
 		case '"':
 		case '{':
-		case '}':
+			append_yylval_tzString('\\');
 			break;
-		case 'n':
-			c = '\n';
+		case '\n':
+			append_yylval_tzString('\\');
+			c = 'n';
 			break;
-		case 'r':
-			c = '\r';
+		case '\r':
+			append_yylval_tzString('\\');
+			c = 'r';
 			break;
-		case 't':
-			c = '\t';
+		case '\t':
+			append_yylval_tzString('\\');
+			c = 't';
 			break;
-
-		case '\0': /* Can't really print that one */
-			error("Illegal character escape at end of macro arg\n");
-			yylval.tzString[i++] = '\\';
-			break;
-
-		/*
-		 * Line continuations and macro args were already
-		 * handled while reading the macro args, so '\@',
-		 * '\#', and '\0'-'\9' should not occur here.
-		 */
-
-		default:
-			error("Illegal character escape '%s'\n", print(c));
-			c = '\\';
-			break;
 		}
-		yylval.tzString[i++] = c;
+
+		append_yylval_tzString(c);
 	}
 
 	return i;
@@ -1479,10 +1466,11 @@
 		case '\\': // Character escape or macro arg
 			c = peek(0);
 			switch (c) {
-			case '\\': // Return that character unchanged
+			case '\\':
 			case '"':
 			case '{':
 			case '}':
+				// Return that character unchanged
 				shiftChars(1);
 				break;
 			case 'n':
@@ -1521,7 +1509,8 @@
 				shiftChars(1);
 				char const *str = readMacroArg(c);
 
-				i = appendMacroArg(str, i);
+				while (*str)
+					append_yylval_tzString(*str++);
 				continue; // Do not copy an additional character
 
 			case EOF: // Can't really print that one
@@ -1528,10 +1517,159 @@
 				error("Illegal character escape at end of input\n");
 				c = '\\';
 				break;
+
 			default:
 				error("Illegal character escape '%s'\n", print(c));
+				shiftChars(1);
+				break;
+			}
+			break;
+
+		case '{': // Symbol interpolation
+			// We'll be exiting the string scope, so re-enable expansions
+			// (Not interpolations, since they're handled by the function itself...)
+			lexerState->disableMacroArgs = false;
+			char const *ptr = readInterpolation();
+
+			if (ptr)
+				while (*ptr)
+					append_yylval_tzString(*ptr++);
+			lexerState->disableMacroArgs = true;
+			continue; // Do not copy an additional character
+
+		// Regular characters will just get copied
+		}
+
+		append_yylval_tzString(c);
+	}
+
+finish:
+	if (i == sizeof(yylval.tzString)) {
+		i--;
+		warning(WARNING_LONG_STR, "String constant too long\n");
+	}
+	yylval.tzString[i] = '\0';
+
+	dbgPrint("Read string \"%s\"\n", yylval.tzString);
+	lexerState->disableMacroArgs = false;
+	lexerState->disableInterpolation = false;
+}
+
+static size_t appendStringLiteral(size_t i)
+{
+	dbgPrint("Reading string\n");
+	lexerState->disableMacroArgs = true;
+	lexerState->disableInterpolation = true;
+
+	bool multiline = false;
+
+	// We reach this function after reading a single quote, but we also support triple quotes
+	append_yylval_tzString('"');
+	if (peek(0) == '"') {
+		append_yylval_tzString('"');
+		shiftChars(1);
+		if (peek(0) == '"') {
+			// """ begins a multi-line string
+			append_yylval_tzString('"');
+			shiftChars(1);
+			multiline = true;
+		} else {
+			// "" is an empty string, skip the loop
+			goto finish;
+		}
+	}
+
+	for (;;) {
+		int c = peek(0);
+
+		// '\r', '\n' or EOF ends a single-line string early
+		if (c == EOF || (!multiline && (c == '\r' || c == '\n'))) {
+			error("Unterminated string\n");
+			break;
+		}
+
+		// We'll be staying in the string, so we can safely consume the char
+		shiftChars(1);
+
+		// Handle '\r' or '\n' (in multiline strings only, already handled above otherwise)
+		if (c == '\r' || c == '\n') {
+			/* Handle CRLF before nextLine() since shiftChars updates colNo */
+			if (c == '\r' && peek(0) == '\n')
+				shiftChars(1);
+			nextLine();
+			c = '\n';
+		}
+
+		switch (c) {
+		case '"':
+			if (multiline) {
+				// Only """ ends a multi-line string
+				if (peek(0) != '"' || peek(1) != '"')
+					break;
+				append_yylval_tzString('"');
+				append_yylval_tzString('"');
+				shiftChars(2);
+			}
+			append_yylval_tzString('"');
+			goto finish;
+
+		case '\\': // Character escape or macro arg
+			c = peek(0);
+			switch (c) {
+			// Character escape
+			case '\\':
+			case '"':
+			case '{':
+			case '}':
+			case 'n':
+			case 'r':
+			case 't':
+				// Return that character unchanged
+				append_yylval_tzString('\\');
+				shiftChars(1);
+				break;
+
+			// Line continuation
+			case ' ':
+			case '\r':
+			case '\n':
+				readLineContinuation();
+				continue;
+
+			// Macro arg
+			case '@':
+			case '#':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				shiftChars(1);
+				char const *str = readMacroArg(c);
+
+				i = appendEscapedSubstring(str, i);
+				continue; // Do not copy an additional character
+
+			case EOF: // Can't really print that one
+				error("Illegal character escape at end of input\n");
 				c = '\\';
 				break;
+
+			case ',': /* `\,` inside a macro arg string literal */
+				warning(WARNING_OBSOLETE,
+					"`\\,` is deprecated inside strings\n");
+				shiftChars(1);
+				break;
+
+			default:
+				error("Illegal character escape '%s'\n", print(c));
+				shiftChars(1);
+				break;
 			}
 			break;
 
@@ -1542,8 +1680,7 @@
 			char const *ptr = readInterpolation();
 
 			if (ptr)
-				while (*ptr && i < sizeof(yylval.tzString))
-					yylval.tzString[i++] = *ptr++;
+				i = appendEscapedSubstring(ptr, i);
 			lexerState->disableMacroArgs = true;
 			continue; // Do not copy an additional character
 
@@ -1550,8 +1687,7 @@
 		// Regular characters will just get copied
 		}
 
-		if (i < sizeof(yylval.tzString)) // Copy one extra to flag overflow
-			yylval.tzString[i++] = c;
+		append_yylval_tzString(c);
 	}
 
 finish:
@@ -1564,6 +1700,8 @@
 	dbgPrint("Read string \"%s\"\n", yylval.tzString);
 	lexerState->disableMacroArgs = false;
 	lexerState->disableInterpolation = false;
+
+	return i;
 }
 
 /* Function to report one character's worth of garbage bytes */
@@ -1835,6 +1973,7 @@
 			case EOF:
 				error("Illegal character escape at end of input\n");
 				break;
+
 			default:
 				shiftChars(1);
 				error("Illegal character escape '%s'\n", print(c));
@@ -1886,9 +2025,8 @@
 	dbgPrint("Lexing in raw mode, line=%" PRIu32 ", col=%" PRIu32 "\n",
 		 lexer_GetLineNo(), lexer_GetColNo());
 
-	/* This is essentially a modified `readString` */
+	/* This is essentially a modified `appendStringLiteral` */
 	size_t i = 0;
-	bool insideString = false;
 
 	/* Trim left of string... */
 	while (isWhitespace(peek(0)))
@@ -1898,18 +2036,16 @@
 		int c = peek(0);
 
 		switch (c) {
-		case '"':
-			insideString = !insideString;
-			/* Other than that, just process quotes normally */
+		case '"': /* String literals inside macro args */
+			shiftChars(1);
+			i = appendStringLiteral(i);
 			break;
 
 		case ';': /* Comments inside macro args */
-			if (insideString)
-				break;
 			discardComment();
 			c = peek(0);
 			/* fallthrough */
-		case ',':
+		case ',': /* End of macro arg */
 		case '\r':
 		case '\n':
 		case EOF:
@@ -1939,16 +2075,30 @@
 			return T_STRING;
 
 		case '\\': /* Character escape */
-			c = peek(1);
+			shiftChars(1); /* Shift the backslash */
+			c = peek(0);
+
 			switch (c) {
-			case ',':
-				shiftChars(1);
+			case ',': /* Escape `\,` only inside a macro arg */
+			case '\\': /* Escapes shared with string literals */
+			case '"':
+			case '{':
+			case '}':
 				break;
 
+			case 'n':
+				c = '\n';
+				break;
+			case 'r':
+				c = '\r';
+				break;
+			case 't':
+				c = '\t';
+				break;
+
 			case ' ':
 			case '\r':
 			case '\n':
-				shiftChars(1); /* Shift the backslash */
 				readLineContinuation();
 				continue;
 
@@ -1956,19 +2106,27 @@
 				error("Illegal character escape at end of input\n");
 				c = '\\';
 				break;
-			default: /* Pass the rest as-is */
-				c = '\\';
+
+			/*
+			 * Macro args were already handled by peek, so '\@',
+			 * '\#', and '\0'-'\9' should not occur here.
+			 */
+
+			default:
+				error("Illegal character escape '%s'\n", print(c));
 				break;
 			}
-			break;
+			/* fallthrough */
 
-		/* Regular characters will just get copied */
+		default: /* Regular characters will just get copied */
+			append_yylval_tzString(c);
+			shiftChars(1);
+			break;
 		}
-		if (i < sizeof(yylval.tzString)) /* Copy one extra to flag overflow */
-			yylval.tzString[i++] = c;
-		shiftChars(1);
 	}
 }
+
+#undef append_yylval_tzString
 
 /*
  * This function uses the fact that `if`, etc. constructs are only valid when
--- a/src/asm/rgbasm.5
+++ b/src/asm/rgbasm.5
@@ -235,7 +235,6 @@
 .It Sy String Ta Sy Meaning
 .It Ql \[rs]\[rs] Ta Produces a backslash
 .It Ql \[rs]" Ta Produces a double quote without terminating
-.It Ql \[rs], Ta Comma
 .It Ql \[rs]{ Ta Curly bracket left
 .It Ql \[rs]} Ta Curly bracket right
 .It Ql \[rs]n Ta Newline ($0A)
@@ -1088,6 +1087,10 @@
 ENDM
 .Ed
 .El
+.Pp
+Macro arguments support all the escape sequences of strings, as well as
+.Ql \[rs],
+to escape commas, since those otherwise separate arguments.
 .Ss Exporting and importing symbols
 Importing and exporting of symbols is a feature that is very useful when your project spans many source files and, for example, you need to jump to a routine defined in another file.
 .Pp
@@ -1462,16 +1465,13 @@
 ENDM
 
     PrintMacro STRCAT("Hello "\[rs], \[rs]
-                      "world\[rs]\[rs]n")
+                      "world\[rs]n")
 .Ed
 .Pp
 The comma needs to be escaped to avoid it being treated as separating the macro's arguments.
-The backslash
-.Sq \[rs]
-.Pq from Sq \[rs]n
-also needs to be escaped because of the way
-.Nm
-processes macro arguments.
+The backslash in
+.Ql \[rs]n
+does not need to be escaped because string literals also work as usual inside macro arguments.
 .Pp
 In reality, up to 256 arguments can be passed to a macro, but you can only use the first 9 like this.
 If you want to use the rest, you need to use the
--- a/test/asm/macro-arg-in-string.asm
+++ b/test/asm/macro-arg-in-string.asm
@@ -1,9 +1,12 @@
 print1: MACRO
+	if _NARG == 2
+		assert !STRCMP("\1", \2)
+	endc
 	PRINTLN "\1"
 ENDM
 
 	print1 John "Danger" Smith
-	print1 \\A\nB
+	print1 \\\\A\\nB\n, "\\\\A\\nB\n"
 	print1 C\
 D
 	print1 E\!F ; illegal character escape
@@ -15,3 +18,10 @@
 
 s EQUS "hello"
 	iprint s
+
+symprint: MACRO
+	PRINTLN {\1}
+ENDM
+
+hello EQUS "\"goodbye\""
+	symprint s
--- a/test/asm/macro-arg-in-string.err
+++ b/test/asm/macro-arg-in-string.err
@@ -1,3 +1,3 @@
-ERROR: macro-arg-in-string.asm(9) -> macro-arg-in-string.asm::print1(2):
+ERROR: macro-arg-in-string.asm(12):
     Illegal character escape '!'
 error: Assembly aborted (1 errors)!
--- a/test/asm/macro-arg-in-string.out
+++ b/test/asm/macro-arg-in-string.out
@@ -1,6 +1,7 @@
 John "Danger" Smith
-\A
-B
+\\A\nB
+
 CD
-E\F
+E!F
 hello
+goodbye
--- a/test/asm/multi-line-strings.asm
+++ b/test/asm/multi-line-strings.asm
@@ -21,7 +21,8 @@
 ENDM
 
 	printarg "
-	printarg """
+	printarg """multi-line
+string argument"""
 
 EMPTY1 EQUS ""
 EMPTY2 EQUS "\ ; comment
--- a/test/asm/multi-line-strings.err
+++ b/test/asm/multi-line-strings.err
@@ -1,2 +1,5 @@
-warning: multi-line-strings.asm(34): [-Wuser]
+ERROR: multi-line-strings.asm(23):
+    Unterminated string
+warning: multi-line-strings.asm(35): [-Wuser]
     check the line number
+error: Assembly aborted (1 errors)!
--- a/test/asm/multi-line-strings.out
+++ b/test/asm/multi-line-strings.out
@@ -8,6 +8,8 @@
 !
 arg <">
 arg (")
-arg <""">
-arg (""")
+arg <"""multi-line
+string argument""">
+arg ("""multi-line
+string argument""")
 ()
--- /dev/null
+++ b/test/asm/quine.asm
@@ -1,0 +1,14 @@
+R:MACRO
+REPT _NARG
+PRINT STRSUB("\n\"\\ ENRST1ABCDFGHIMOPU_n#()+,:>",\1+1,1)
+SHIFT
+ENDR
+ENDM
+N:MACRO
+ R \#
+REPT _NARG
+PRINT"\1",STRSUB("\n,",(_NARG>1)+1,1)
+SHIFT
+ENDR
+ENDM
+ N 6,29,18,10,12,6,19,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,3,7,8,6,7,21,11,25,1,2,23,2,1,2,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,1,28,2,9,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,5,29,18,10,12,6,19,0,3,6,3,2,24,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,1,2,9,1,28,7,8,6,7,21,11,25,1,2,23,28,1,28,25,22,5,10,6,15,30,9,26,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,3,5,3
--- /dev/null
+++ b/test/asm/quine.out
@@ -1,0 +1,14 @@
+R:MACRO
+REPT _NARG
+PRINT STRSUB("\n\"\\ ENRST1ABCDFGHIMOPU_n#()+,:>",\1+1,1)
+SHIFT
+ENDR
+ENDM
+N:MACRO
+ R \#
+REPT _NARG
+PRINT"\1",STRSUB("\n,",(_NARG>1)+1,1)
+SHIFT
+ENDR
+ENDM
+ N 6,29,18,10,12,6,19,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,3,7,8,6,7,21,11,25,1,2,23,2,1,2,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,1,28,2,9,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,5,29,18,10,12,6,19,0,3,6,3,2,24,0,6,4,20,8,3,22,5,10,6,15,0,20,6,17,5,8,1,2,9,1,28,7,8,6,7,21,11,25,1,2,23,28,1,28,25,22,5,10,6,15,30,9,26,27,9,28,9,26,0,7,16,17,14,8,0,4,5,13,6,0,4,5,13,18,0,3,5,3
--- /dev/null
+++ b/test/asm/quine2.asm
@@ -1,0 +1,4 @@
+q: macro
+	println \1,"\1"
+endm
+	q "q: macro\n\tprintln \\1,\"\\1\"\nendm\n\tq "
--- /dev/null
+++ b/test/asm/quine2.out
@@ -1,0 +1,4 @@
+q: macro
+	println \1,"\1"
+endm
+	q "q: macro\n\tprintln \\1,\"\\1\"\nendm\n\tq "
--- /dev/null
+++ b/test/asm/raw-macro-args.asm
@@ -1,0 +1,38 @@
+printargs: MACRO
+	rept _NARG
+		println \1
+		shift
+	endr
+ENDM
+
+printlit: MACRO
+	rept _NARG
+		println "\1"
+		shift
+	endr
+ENDM
+
+NUM EQU 42
+STR EQUS "str\"ing"
+
+	printargs NUM
+	printargs "{d:NUM}"
+	printargs "{STR}", 16 ; comment 1
+	printargs "\"literal \\\"\\\\\\\"\""
+	printargs "literal \"\\\"", \ ; comment 2
+"""multi-"line"
+  ""string"" arg"""
+	printargs MUL(2.0\, 3.0)
+	printargs "unclosed
+
+	printlit NUM
+	printlit "{d:NUM}"
+	printlit "{STR}", 16 ; comment 3
+	printlit "\"literal \\\"\\\\\\\"\""
+	printlit "literal \"\\\"", \ ; comment 4
+"""multi-"line"
+  ""string"" arg"""
+	printlit MUL(2.0\, 3.0)
+	printlit this\n is\, \{not\} a\\n syntax\" error
+	printlit "unclosed
+	printlit """EOF
\ No newline at end of file
--- /dev/null
+++ b/test/asm/raw-macro-args.err
@@ -1,0 +1,9 @@
+ERROR: raw-macro-args.asm(26):
+    Unterminated string
+ERROR: raw-macro-args.asm(26) -> raw-macro-args.asm::printargs(2) -> raw-macro-args.asm::printargs::REPT~1(3):
+    Unterminated string
+ERROR: raw-macro-args.asm(37):
+    Unterminated string
+ERROR: raw-macro-args.asm(38):
+    Unterminated string
+error: Assembly aborted (4 errors)!
--- /dev/null
+++ b/test/asm/raw-macro-args.out
@@ -1,0 +1,23 @@
+$2A
+42
+str"ing
+$10
+"literal \"\\\""
+literal "\"
+multi-"line"
+  ""string"" arg
+$60000
+unclosed
+NUM
+"42"
+"str\"ing"
+16
+"\"literal \\\"\\\\\\\"\""
+"literal \"\\\""
+"""multi-"line"
+  ""string"" arg"""
+MUL(2.0, 3.0)
+this
+ is, {not} a\n syntax" error
+"unclosed
+"""EOF