ref: f5c2360f5f40876545752c7874cd1286baf34234
parent: e2baffe488c080950e5ffdd25989e6f249f28c5f
author: Ori Bernstein <[email protected]>
date: Sun Aug 5 21:03:35 EDT 2012
Don't get the wrong string length. We used to count escape sequences as multiple chars. Bad. Now we just count them as one.
--- a/parse/parse.h
+++ b/parse/parse.h
@@ -72,7 +72,10 @@
struct Tok {
int type;
int line;
+ char *strsz;
char *str;
+
+ /* values parsed out */
vlong intval;
double fltval;
};
--- a/parse/tok.c
+++ b/parse/tok.c
@@ -201,30 +201,93 @@
return t;
}
+static void append(char **buf, size_t *len, size_t *sz, int c)
+{
+ if (!*sz) {
+ *sz = 16;
+ *buf = malloc(*sz);
+ }
+ if (*len == *sz - 1) {
+ *sz = *sz * 2;
+ *buf = realloc(*buf, *sz);
+ }
+
+ buf[0][len[0]++] = c;
+}
+
+static int hexval(char c)
+{
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= '0' && c <= '9')
+ return c - '0';
+ die("passed non-hex value to hexval()");
+ return -1;
+}
+
+static void decode(char **buf, size_t *len, size_t *sz)
+{
+ char c, c1, c2;
+ int v;
+
+ c = next();
+ /* we've already seen the '\' */
+ switch (c) {
+ case 'x': /* arbitrary hex */
+ c1 = next();
+ if (!isxdigit(c1))
+ fatal(line, "expected hex digit, got %c", c1);
+ c2 = next();
+ if (!isxdigit(c2))
+ fatal(line, "expected hex digit, got %c", c1);
+ v = 16*hexval(c1) + hexval(c2);
+ append(buf, len, sz, v);
+ break;
+ case 'n': append(buf, len, sz, '\n'); break;
+ case 'r': append(buf, len, sz, '\r'); break;
+ case 't': append(buf, len, sz, '\t'); break;
+ case 'b': append(buf, len, sz, '\b'); break;
+ case '"': append(buf, len, sz, '\"'); break;
+ case '\'': append(buf, len, sz, '\''); break;
+ case 'v': append(buf, len, sz, '\v'); break;
+ case '\\': append(buf, len, sz, '\\'); break;
+ case '0': append(buf, len, sz, '\0'); break;
+ default: fatal(line, "unknown escape code \\%c", c);
+ }
+}
+
static Tok *strlit()
{
Tok *t;
- int sstart; /* start of string within input buf */
int c;
+ size_t len, sz;
+ char *buf;
assert(next() == '"');
- sstart = fidx;
+ buf = NULL;
+ len = 0;
+ sz = 0;
while (1) {
c = next();
/* we don't unescape here, but on output */
if (c == '"')
break;
- else if (c == '\\')
- c = next();
-
- if (c == '\0')
+ else if (c == '\0')
fatal(line, "Unexpected EOF within string");
else if (c == '\n')
fatal(line, "Newlines not allowed in strings");
+ else if (c == '\\')
+ decode(&buf, &len, &sz);
+ else
+ append(&buf, &len, &sz, c);
};
+ buf[len] = '\0';
+
t = mktok(Tstrlit);
- t->str = strdupn(&fbuf[sstart], fidx - sstart - 1);
+ t->str = buf;
return t;
}
@@ -231,27 +294,34 @@
static Tok *charlit()
{
Tok *t;
- int sstart; /* start of string within input buf */
int c;
+ size_t len, sz;
+ char *buf;
assert(next() == '\'');
- sstart = fidx;
+ buf = NULL;
+ len = 0;
+ sz = 0;
while (1) {
c = next();
/* we don't unescape here, but on output */
if (c == '\'')
break;
- else if (c == '\\')
- c = next();
-
- if (c == '\0')
+ else if (c == '\0')
fatal(line, "Unexpected EOF within char lit");
else if (c == '\n')
fatal(line, "Newlines not allowed in char lit");
+ else if (c == '\\')
+ decode(&buf, &len, &sz);
+ else
+ append(&buf, &len, &sz, c);
+
};
+ buf[len] = '\0';
+
t = mktok(Tchrlit);
- t->str = strdupn(&fbuf[sstart], fidx - sstart - 1);
+ t->str = buf;
return t;
}