shithub: rgbds

Download patch

ref: dc5b7802c873ce2549e7443d2250e9eed73e1367
parent: b1e6c7319792afa6aeda51fd83b891497dcccc4c
author: Rangi <[email protected]>
date: Mon Apr 19 08:00:42 EDT 2021

Make the `len` parameter optional in `STRSUB(str, pos, len)`

An unspecified length will continue to the end of the string.

--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -105,7 +105,7 @@
 	return len;
 }
 
-static void strsubUTF8(char *dest, size_t destLen, char const *src, int32_t pos, uint32_t len)
+static void strsubUTF8(char *dest, size_t destLen, char const *src, uint32_t pos, uint32_t len)
 {
 	size_t srcIndex = 0;
 	size_t destIndex = 0;
@@ -112,18 +112,11 @@
 	uint32_t state = 0;
 	uint32_t codep = 0;
 	uint32_t curLen = 0;
+	uint32_t curPos = 1;
 
-	if (pos < 1) {
-		pos += strlenUTF8(src);
-		if (pos < 1) {
-			warning(WARNING_BUILTIN_ARG, "STRSUB: Position starts at 1\n");
-			pos = 1;
-		}
-	}
-
 	/* Advance to starting position in source string. */
-	for (uint32_t curPos = 1; src[srcIndex] && curPos < pos; srcIndex++) {
-		switch (decode(&state, &codep, src[srcIndex])) {
+	while (src[srcIndex] && curPos < pos) {
+		switch (decode(&state, &codep, src[srcIndex++])) {
 		case 1:
 			fatalerror("STRSUB: Invalid UTF-8 character\n");
 			break;
@@ -133,9 +126,13 @@
 		}
 	}
 
-	if (!src[srcIndex] && len)
+	/*
+	 * A position 1 past the end of the string is allowed, but will trigger the
+	 * "Length too big" warning below if the length is nonzero.
+	 */
+	if (!src[srcIndex] && pos > curPos)
 		warning(WARNING_BUILTIN_ARG,
-			"STRSUB: Position %" PRId32 " is past the end of the string\n", pos);
+			"STRSUB: Position %" PRIu32 " is past the end of the string\n", pos);
 
 	/* Copy from source to destination. */
 	while (src[srcIndex] && destIndex < destLen - 1 && curLen < len) {
@@ -151,7 +148,7 @@
 	}
 
 	if (curLen < len)
-		warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %lu\n", (unsigned long)len);
+		warning(WARNING_BUILTIN_ARG, "STRSUB: Length too big: %" PRIu32 "\n", len);
 
 	/* Check for partial code point. */
 	if (state != 0)
@@ -170,18 +167,10 @@
 	return len;
 }
 
-static void charsubUTF8(char *dest, char const *src, int32_t pos)
+static void charsubUTF8(char *dest, char const *src, uint32_t pos)
 {
 	size_t charLen = 1;
 
-	if (pos < 1) {
-		pos += charlenUTF8(src);
-		if (pos < 1) {
-			warning(WARNING_BUILTIN_ARG, "CHARSUB: Position starts at 1\n");
-			pos = 1;
-		}
-	}
-
 	/* Advance to starting position in source string. */
 	for (uint32_t curPos = 1; charLen && curPos < pos; curPos++)
 		charLen = charmap_ConvertNext(&src, NULL);
@@ -190,7 +179,7 @@
 
 	if (!charmap_ConvertNext(&src, NULL))
 		warning(WARNING_BUILTIN_ARG,
-			"CHARSUB: Position %" PRId32 " is past the end of the string\n", pos);
+			"CHARSUB: Position %" PRIu32 " is past the end of the string\n", pos);
 
 	/* Copy from source to destination. */
 	memcpy(dest, start, src - start);
@@ -198,6 +187,22 @@
 	dest[src - start] = '\0';
 }
 
+static uint32_t adjustNegativePos(int32_t pos, size_t len, char const *functionName)
+{
+	/*
+	 * STRSUB and CHARSUB adjust negative `pos` arguments the same way,
+	 * such that position 0 is the last character of a string.
+	 */
+	if (pos < 1) {
+		pos += len;
+		if (pos < 1) {
+			warning(WARNING_BUILTIN_ARG, "%s: Position starts at 1\n", functionName);
+			pos = 1;
+		}
+	}
+	return (uint32_t)pos;
+}
+
 static void strrpl(char *dest, size_t destLen, char const *src, char const *old, char const *new)
 {
 	size_t oldLen = strlen(old);
@@ -1531,10 +1536,22 @@
 
 string		: T_STRING
 		| T_OP_STRSUB T_LPAREN string T_COMMA const T_COMMA uconst T_RPAREN {
-			strsubUTF8($$, sizeof($$), $3, $5, $7);
+			size_t len = strlenUTF8($3);
+			uint32_t pos = adjustNegativePos($5, len, "STRSUB");
+
+			strsubUTF8($$, sizeof($$), $3, pos, $7);
 		}
+		| T_OP_STRSUB T_LPAREN string T_COMMA const T_RPAREN {
+			size_t len = strlenUTF8($3);
+			uint32_t pos = adjustNegativePos($5, len, "STRSUB");
+
+			strsubUTF8($$, sizeof($$), $3, pos, pos > len ? 0 : len + 1 - pos);
+		}
 		| T_OP_CHARSUB T_LPAREN string T_COMMA const T_RPAREN {
-			charsubUTF8($$, $3, $5);
+			size_t len = charlenUTF8($3);
+			uint32_t pos = adjustNegativePos($5, len, "CHARSUB");
+
+			charsubUTF8($$, $3, pos);
 		}
 		| T_OP_STRCAT T_LPAREN T_RPAREN {
 			$$[0] = '\0';
--- a/src/asm/rgbasm.5
+++ b/src/asm/rgbasm.5
@@ -394,7 +394,7 @@
 .It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 .
 .It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
 .It Fn STRRIN str1 str2 Ta Returns the last position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
-.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1) and Ar len No characters long. Zero or negative Ar pos No counts from the end, as if Qo STRLEN(str) Qc were added to it.
+.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1) and Ar len No characters long. Zero or negative Ar pos No counts from the end, as if Qo STRLEN(str) Qc were added to it. If Ar len No is not specified the substring continues to the end of Ar str .
 .It Fn STRUPR str Ta Returns Ar str No with all letters in uppercase.
 .It Fn STRLWR str Ta Returns Ar str No with all letters in lowercase.
 .It Fn STRRPL str old new Ta Returns Ar str No with each non-overlapping occurrence of the substring Ar old No replaced with Ar new .
--- a/test/asm/strsub.asm
+++ b/test/asm/strsub.asm
@@ -1,7 +1,7 @@
 SECTION "sec", ROM0
 
 xstrsub: MACRO
-	PRINTLN STRSUB(\1, \2, \3)
+	PRINTLN STRSUB(\#)
 ENDM
 
 	xstrsub "ABC", 1, 1
@@ -10,6 +10,10 @@
 	xstrsub "ABC", -2, 1
 	xstrsub "ABC", -1, 1
 	xstrsub "ABC", 0, 1
+	xstrsub "ABC", 2
+	xstrsub "ABC", -1
+	xstrsub "ABC", 5
+	xstrsub "ABC", -5
 	xstrsub "ABC", 1, 2
 	xstrsub "ABC", 2, 2
 	xstrsub "ABC", 2, 32
@@ -16,6 +20,7 @@
 	xstrsub "ABC", 2, 300
 	xstrsub "ABC", -3, 300
 	xstrsub "ABC", 4, 0
+	xstrsub "ABC", 5, 0
 	xstrsub "ABC", 4, 1
 	xstrsub "カタカナ", 1, 2
 	xstrsub "カタカナ", 3, 2
--- a/test/asm/strsub.err
+++ b/test/asm/strsub.err
@@ -1,14 +1,18 @@
 warning: strsub.asm(15) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
-    STRSUB: Length too big: 32
+    STRSUB: Position 5 is past the end of the string
 warning: strsub.asm(16) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
+    STRSUB: Position starts at 1
+warning: strsub.asm(19) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
+    STRSUB: Length too big: 32
+warning: strsub.asm(20) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
     STRSUB: Length too big: 300
-warning: strsub.asm(17) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
+warning: strsub.asm(21) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
     STRSUB: Position starts at 1
-warning: strsub.asm(17) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
+warning: strsub.asm(21) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
     STRSUB: Length too big: 300
-warning: strsub.asm(19) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
-    STRSUB: Position 4 is past the end of the string
-warning: strsub.asm(19) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
+warning: strsub.asm(23) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
+    STRSUB: Position 5 is past the end of the string
+warning: strsub.asm(24) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
     STRSUB: Length too big: 1
-warning: strsub.asm(22) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
+warning: strsub.asm(27) -> strsub.asm::xstrsub(4): [-Wbuiltin-args]
     STRSUB: Length too big: 10
--- a/test/asm/strsub.out
+++ b/test/asm/strsub.out
@@ -4,11 +4,16 @@
 A
 B
 C
+BC
+BC
+
+ABC
 AB
 BC
 BC
 BC
 ABC
+
 
 
 カタ