shithub: rgbds

Download patch

ref: defb221c9834a15da53fc9b1023f9c55e2637f12
parent: dfdb1071051b28608d178f3611332db1e299d105
parent: e7dc094e568bf05688fd729e9a266ee02d2a2607
author: Antonio Niño Díaz <[email protected]>
date: Sun Jul 7 07:46:08 EDT 2019

Merge pull request #360 from jidoc01/master

Improve charmap structure with trie.

--- a/include/asm/charmap.h
+++ b/include/asm/charmap.h
@@ -13,14 +13,25 @@
 
 #define MAXCHARMAPS	512
 #define CHARMAPLENGTH	16
+#define MAXCHARNODES	(MAXCHARMAPS * CHARMAPLENGTH + 1)
 
+/*
+ * A node for trie structure.
+ */
+struct Charnode {
+	uint8_t code; /* the value in a key-value pair. */
+	uint8_t isCode; /* has one if it's a code node, not just a bridge node. */
+	struct Charnode *next[256]; /* each index representing the next possible character from its current state. */
+};
+
 struct Charmap {
-	int32_t count;
-	char input[MAXCHARMAPS][CHARMAPLENGTH + 1];
-	char output[MAXCHARMAPS];
+	int32_t charCount; /* user-side count. */
+	int32_t nodeCount; /* node-side count. */
+	struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */
 };
 
 int32_t readUTF8Char(char *destination, char *source);
+
 int32_t charmap_Add(char *input, uint8_t output);
 int32_t charmap_Convert(char **input);
 
--- a/src/asm/charmap.c
+++ b/src/asm/charmap.c
@@ -42,11 +42,10 @@
 int32_t charmap_Add(char *input, uint8_t output)
 {
 	int32_t i;
-	size_t input_length;
-	char temp1i[CHARMAPLENGTH + 1], temp2i[CHARMAPLENGTH + 1];
-	char temp1o = 0, temp2o = 0;
+	uint8_t v;
 
-	struct Charmap *charmap;
+	struct Charmap 	*charmap;
+	struct Charnode	*curr_node, *temp_node;
 
 	if (pCurrentSection) {
 		if (pCurrentSection->charmap) {
@@ -55,7 +54,6 @@
 			charmap = calloc(1, sizeof(struct Charmap));
 			if (charmap == NULL)
 				fatalerror("Not enough memory for charmap");
-
 			pCurrentSection->charmap = charmap;
 		}
 	} else {
@@ -62,84 +60,103 @@
 		charmap = &globalCharmap;
 	}
 
-	if (charmap->count > MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
+	if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
 		return -1;
 
-	input_length = strlen(input);
-	if (input_length > 1) {
-		i = 0;
-		while (i < charmap->count + 1) {
-			if (input_length > strlen(charmap->input[i])) {
-				memcpy(temp1i, charmap->input[i],
-				       CHARMAPLENGTH + 1);
-				memcpy(charmap->input[i], input, input_length);
-				temp1o = charmap->output[i];
-				charmap->output[i] = output;
-				i++;
-				break;
-			}
-			i++;
+	curr_node = &charmap->nodes[0];
+
+	for (i = 0; (v = (uint8_t)input[i]); i++) {
+		if (curr_node->next[v]) {
+			curr_node = curr_node->next[v];
+		} else {
+			temp_node = &charmap->nodes[charmap->nodeCount + 1];
+
+			curr_node->next[v] = temp_node;
+			curr_node = temp_node;
+
+			++charmap->nodeCount;
 		}
-		while (i < charmap->count + 1) {
-			memcpy(temp2i, charmap->input[i], CHARMAPLENGTH + 1);
-			memcpy(charmap->input[i], temp1i, CHARMAPLENGTH + 1);
-			memcpy(temp1i, temp2i, CHARMAPLENGTH + 1);
-			temp2o = charmap->output[i];
-			charmap->output[i] = temp1o;
-			temp1o = temp2o;
-			i++;
-		}
-		memcpy(charmap->input[charmap->count + 1], temp1i,
-		       CHARMAPLENGTH + 1);
-		charmap->output[charmap->count + 1] = temp1o;
-	} else {
-		memcpy(charmap->input[charmap->count], input, input_length);
-		charmap->output[charmap->count] = output;
 	}
-	return ++charmap->count;
+
+	/* prevent duplicated keys by accepting only first key-value pair.  */
+	if (curr_node->isCode)
+		return charmap->charCount;
+
+	curr_node->code = output;
+	curr_node->isCode = 1;
+
+	return ++charmap->charCount;
 }
 
 int32_t charmap_Convert(char **input)
 {
-	struct Charmap *charmap;
+	struct Charmap 	*charmap;
+	struct Charnode	*charnode;
 
-	char outchar[CHARMAPLENGTH + 1];
-	char *buffer;
-	int32_t i, j, length;
+	char *output;
+	char outchar[8];
 
+	int32_t i, match, length;
+	uint8_t v, foundCode;
+
 	if (pCurrentSection && pCurrentSection->charmap)
 		charmap = pCurrentSection->charmap;
 	else
 		charmap = &globalCharmap;
 
-	buffer = malloc(strlen(*input));
-	if (buffer == NULL)
+	output = malloc(strlen(*input));
+	if (output == NULL)
 		fatalerror("Not enough memory for buffer");
 
 	length = 0;
+
 	while (**input) {
-		j = 0;
-		for (i = 0; i < charmap->count; i++) {
-			j = strlen(charmap->input[i]);
-			if (memcmp(*input, charmap->input[i], j) == 0) {
-				outchar[0] = charmap->output[i];
-				outchar[1] = 0;
+		charnode = &charmap->nodes[0];
+
+		/*
+		 * find the longest valid match which has been registered in charmap.
+		 * note that there could be either multiple matches or no match.
+		 * and it possibly takes the longest match between them,
+		 * which means that it ignores partial matches shorter than the longest one.
+		*/
+		for (i = match = 0; (v = (*input)[i]);) {
+			if (!charnode->next[v])
 				break;
+
+			charnode = charnode->next[v];
+			i++;
+
+			if (charnode->isCode) {
+				match = i;
+				foundCode = charnode->code;
 			}
-			j = 0;
 		}
 
-		if (!j)
-			j = readUTF8Char(outchar, *input);
+		if (match) {
+			output[length] = foundCode;
 
-		if (!outchar[0]) {
-			buffer[length++] = 0;
+			length += 1;
 		} else {
-			for (i = 0; outchar[i]; i++)
-				buffer[length++] = outchar[i];
+			/*
+			 * put a utf-8 character
+			 * if failed to find a match.
+			 */
+			match = readUTF8Char(outchar, *input);
+
+			if (match) {
+				memcpy(output + length, *input, match);
+			} else {
+				output[length] = 0;
+				match = 1;
+			}
+
+			length += match;
 		}
-		*input += j;
+
+		*input += match;
 	}
-	*input = buffer;
+
+	*input = output;
+
 	return length;
 }