ref: defb221c9834a15da53fc9b1023f9c55e2637f12
parent: dfdb1071051b28608d178f3611332db1e299d105
parent: e7dc094e568bf05688fd729e9a266ee02d2a2607
author: Antonio Niño Díaz <[email protected]>
date: Sun Jul 7 07:46:08 EDT 2019
Merge pull request #360 from jidoc01/master Improve charmap structure with trie.
--- a/include/asm/charmap.h
+++ b/include/asm/charmap.h
@@ -13,14 +13,25 @@
#define MAXCHARMAPS 512
#define CHARMAPLENGTH 16
+#define MAXCHARNODES (MAXCHARMAPS * CHARMAPLENGTH + 1)
+/*
+ * A node for trie structure.
+ */
+struct Charnode {
+ uint8_t code; /* the value in a key-value pair. */
+ uint8_t isCode; /* has one if it's a code node, not just a bridge node. */
+ struct Charnode *next[256]; /* each index representing the next possible character from its current state. */
+};
+
struct Charmap {
- int32_t count;
- char input[MAXCHARMAPS][CHARMAPLENGTH + 1];
- char output[MAXCHARMAPS];
+ int32_t charCount; /* user-side count. */
+ int32_t nodeCount; /* node-side count. */
+ struct Charnode nodes[MAXCHARNODES]; /* first node is reserved for the root node in charmap. */
};
int32_t readUTF8Char(char *destination, char *source);
+
int32_t charmap_Add(char *input, uint8_t output);
int32_t charmap_Convert(char **input);
--- a/src/asm/charmap.c
+++ b/src/asm/charmap.c
@@ -42,11 +42,10 @@
int32_t charmap_Add(char *input, uint8_t output)
{
int32_t i;
- size_t input_length;
- char temp1i[CHARMAPLENGTH + 1], temp2i[CHARMAPLENGTH + 1];
- char temp1o = 0, temp2o = 0;
+ uint8_t v;
- struct Charmap *charmap;
+ struct Charmap *charmap;
+ struct Charnode *curr_node, *temp_node;
if (pCurrentSection) {
if (pCurrentSection->charmap) {
@@ -55,7 +54,6 @@
charmap = calloc(1, sizeof(struct Charmap));
if (charmap == NULL)
fatalerror("Not enough memory for charmap");
-
pCurrentSection->charmap = charmap;
}
} else {
@@ -62,84 +60,103 @@
charmap = &globalCharmap;
}
- if (charmap->count > MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
+ if (charmap->charCount >= MAXCHARMAPS || strlen(input) > CHARMAPLENGTH)
return -1;
- input_length = strlen(input);
- if (input_length > 1) {
- i = 0;
- while (i < charmap->count + 1) {
- if (input_length > strlen(charmap->input[i])) {
- memcpy(temp1i, charmap->input[i],
- CHARMAPLENGTH + 1);
- memcpy(charmap->input[i], input, input_length);
- temp1o = charmap->output[i];
- charmap->output[i] = output;
- i++;
- break;
- }
- i++;
+ curr_node = &charmap->nodes[0];
+
+ for (i = 0; (v = (uint8_t)input[i]); i++) {
+ if (curr_node->next[v]) {
+ curr_node = curr_node->next[v];
+ } else {
+ temp_node = &charmap->nodes[charmap->nodeCount + 1];
+
+ curr_node->next[v] = temp_node;
+ curr_node = temp_node;
+
+ ++charmap->nodeCount;
}
- while (i < charmap->count + 1) {
- memcpy(temp2i, charmap->input[i], CHARMAPLENGTH + 1);
- memcpy(charmap->input[i], temp1i, CHARMAPLENGTH + 1);
- memcpy(temp1i, temp2i, CHARMAPLENGTH + 1);
- temp2o = charmap->output[i];
- charmap->output[i] = temp1o;
- temp1o = temp2o;
- i++;
- }
- memcpy(charmap->input[charmap->count + 1], temp1i,
- CHARMAPLENGTH + 1);
- charmap->output[charmap->count + 1] = temp1o;
- } else {
- memcpy(charmap->input[charmap->count], input, input_length);
- charmap->output[charmap->count] = output;
}
- return ++charmap->count;
+
+ /* prevent duplicated keys by accepting only first key-value pair. */
+ if (curr_node->isCode)
+ return charmap->charCount;
+
+ curr_node->code = output;
+ curr_node->isCode = 1;
+
+ return ++charmap->charCount;
}
int32_t charmap_Convert(char **input)
{
- struct Charmap *charmap;
+ struct Charmap *charmap;
+ struct Charnode *charnode;
- char outchar[CHARMAPLENGTH + 1];
- char *buffer;
- int32_t i, j, length;
+ char *output;
+ char outchar[8];
+ int32_t i, match, length;
+ uint8_t v, foundCode;
+
if (pCurrentSection && pCurrentSection->charmap)
charmap = pCurrentSection->charmap;
else
charmap = &globalCharmap;
- buffer = malloc(strlen(*input));
- if (buffer == NULL)
+ output = malloc(strlen(*input));
+ if (output == NULL)
fatalerror("Not enough memory for buffer");
length = 0;
+
while (**input) {
- j = 0;
- for (i = 0; i < charmap->count; i++) {
- j = strlen(charmap->input[i]);
- if (memcmp(*input, charmap->input[i], j) == 0) {
- outchar[0] = charmap->output[i];
- outchar[1] = 0;
+ charnode = &charmap->nodes[0];
+
+ /*
+ * find the longest valid match which has been registered in charmap.
+ * note that there could be either multiple matches or no match.
+ * and it possibly takes the longest match between them,
+ * which means that it ignores partial matches shorter than the longest one.
+ */
+ for (i = match = 0; (v = (*input)[i]);) {
+ if (!charnode->next[v])
break;
+
+ charnode = charnode->next[v];
+ i++;
+
+ if (charnode->isCode) {
+ match = i;
+ foundCode = charnode->code;
}
- j = 0;
}
- if (!j)
- j = readUTF8Char(outchar, *input);
+ if (match) {
+ output[length] = foundCode;
- if (!outchar[0]) {
- buffer[length++] = 0;
+ length += 1;
} else {
- for (i = 0; outchar[i]; i++)
- buffer[length++] = outchar[i];
+ /*
+ * put a utf-8 character
+ * if failed to find a match.
+ */
+ match = readUTF8Char(outchar, *input);
+
+ if (match) {
+ memcpy(output + length, *input, match);
+ } else {
+ output[length] = 0;
+ match = 1;
+ }
+
+ length += match;
}
- *input += j;
+
+ *input += match;
}
- *input = buffer;
+
+ *input = output;
+
return length;
}