ref: de32e245c973d23815c27e9fe3bb2c6a5d01140d
dir: /src/asm/charmap.c/
/* * UTF-8 decoder copyright © 2008–2009 Björn Höhrmann <[email protected]> * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include <stdint.h> static const uint8_t utf8d[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..0f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10..1f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..2f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 30..3f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..4f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50..5f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..6f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70..7f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80..8f */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 90..9f */ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..af */ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* b0..bf */ 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..cf */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* d0..df */ 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, /* e0..e7 */ 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, /* e8..ef */ 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, /* f0..f7 */ 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, /* f8..ff */ 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, /* s0.. */ 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, /* ..s0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s1 */ 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1 */ 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, /* s3 */ 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s4 */ 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, /* s5 */ 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s6 */ 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s7 */ 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s8 */ }; uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte) { uint32_t type = utf8d[byte]; *codep = (*state != 0) ? (byte & 0x3fu) | (*codep << 6) : (0xff >> type) & (byte); *state = utf8d[256 + *state * 16 + type]; return *state; } /* * Copyright © 2013 stag019 <[email protected]> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include "asm/asm.h" #include "asm/charmap.h" #include "asm/main.h" #include "asm/output.h" struct Charmap globalCharmap = {0}; int32_t readUTF8Char(char *dest, char *src) { uint32_t state; uint32_t codep; int32_t i; for (i = 0, state = 0;; i++) { if (decode(&state, &codep, (uint8_t)src[i]) == 1) fatalerror("invalid UTF-8 character"); dest[i] = src[i]; i++; if (state == 0) { dest[i] = '\0'; return i; } dest[i] = src[i]; } } int32_t charmap_Add(char *input, uint8_t output) { int32_t i; size_t input_length; char temp1i[CHARMAPLENGTH + 1], temp2i[CHARMAPLENGTH + 1]; char temp1o = 0, temp2o = 0; struct Charmap *charmap; if (pCurrentSection) { if (pCurrentSection->charmap) { charmap = pCurrentSection->charmap; } else { charmap = calloc(1, sizeof(struct Charmap)); if (charmap == NULL) fatalerror("Not enough memory for charmap"); pCurrentSection->charmap = charmap; } } else { charmap = &globalCharmap; } if (nPass == 2) return charmap->count; if (charmap->count > MAXCHARMAPS || strlen(input) > CHARMAPLENGTH) return -1; input_length = strlen(input); if (input_length > 1) { i = 0; while (i < charmap->count + 1) { if (input_length > strlen(charmap->input[i])) { memcpy(temp1i, charmap->input[i], CHARMAPLENGTH + 1); memcpy(charmap->input[i], input, input_length); temp1o = charmap->output[i]; charmap->output[i] = output; i++; break; } i++; } while (i < charmap->count + 1) { memcpy(temp2i, charmap->input[i], CHARMAPLENGTH + 1); memcpy(charmap->input[i], temp1i, CHARMAPLENGTH + 1); memcpy(temp1i, temp2i, CHARMAPLENGTH + 1); temp2o = charmap->output[i]; charmap->output[i] = temp1o; temp1o = temp2o; i++; } memcpy(charmap->input[charmap->count + 1], temp1i, CHARMAPLENGTH + 1); charmap->output[charmap->count + 1] = temp1o; } else { memcpy(charmap->input[charmap->count], input, input_length); charmap->output[charmap->count] = output; } return ++charmap->count; } int32_t charmap_Convert(char **input) { struct Charmap *charmap; char outchar[CHARMAPLENGTH + 1]; char *buffer; int32_t i, j, length; if (pCurrentSection && pCurrentSection->charmap) charmap = pCurrentSection->charmap; else charmap = &globalCharmap; buffer = malloc(strlen(*input)); if (buffer == NULL) fatalerror("Not enough memory for buffer"); length = 0; while (**input) { j = 0; for (i = 0; i < charmap->count; i++) { j = strlen(charmap->input[i]); if (memcmp(*input, charmap->input[i], j) == 0) { outchar[0] = charmap->output[i]; outchar[1] = 0; break; } j = 0; } if (!j) j = readUTF8Char(outchar, *input); if (!outchar[0]) { buffer[length++] = 0; } else { for (i = 0; outchar[i]; i++) buffer[length++] = outchar[i]; } *input += j; } *input = buffer; return length; }