ref: f510f5f6eaa66e5f6a399f6bc4fa7091e00a6151
parent: 4dc8cff4fd04e8536f5577522ecb95032064f295
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Thu Dec 12 21:05:16 EST 2024
ios, read: better parse error location logic
--- a/ios.c
+++ b/ios.c
@@ -542,9 +542,9 @@
MEM_FREE(s->buf);
s->buf = nil;
s->size = s->maxsize = s->bpos = 0;
- if(s->filename != emptystr){
- MEM_FREE(s->filename);
- s->filename = emptystr;
+ if(s->loc.filename != emptystr){
+ MEM_FREE(s->loc.filename);
+ s->loc.filename = emptystr;
}
}
@@ -718,7 +718,7 @@
s->fpos = -1;
s->fd = -1;
s->ownbuf = 1;
- s->lineno = 1;
+ s->loc.lineno = 1;
}
/* stream object initializers. we do no allocation. */
@@ -744,7 +744,7 @@
goto open_file_err;
if(!wr)
s->readonly = 1;
- s->filename = MEM_STRDUP(fname);
+ s->loc.filename = MEM_STRDUP(fname);
return s;
open_file_err:
s->fd = -1;
@@ -756,7 +756,7 @@
{
_ios_init(s);
s->bm = bm_mem;
- s->filename = emptystr;
+ s->loc.filename = emptystr;
_buf_realloc(s, initsize);
return s;
}
@@ -801,17 +801,17 @@
{
ios_stdin = MEM_ALLOC(sizeof(ios_t));
ios_fd(ios_stdin, STDIN_FILENO, 0, 0);
- ios_stdin->filename = MEM_STRDUP("*stdin*");
+ ios_stdin->loc.filename = MEM_STRDUP("*stdin*");
ios_stdout = MEM_ALLOC(sizeof(ios_t));
ios_fd(ios_stdout, STDOUT_FILENO, 0, 0);
ios_stdout->bm = bm_line;
- ios_stdout->filename = MEM_STRDUP("*stdout*");
+ ios_stdout->loc.filename = MEM_STRDUP("*stdout*");
ios_stderr = MEM_ALLOC(sizeof(ios_t));
ios_fd(ios_stderr, STDERR_FILENO, 0, 0);
ios_stderr->bm = bm_none;
- ios_stderr->filename = MEM_STRDUP("*stderr*");
+ ios_stderr->loc.filename = MEM_STRDUP("*stderr*");
}
/* higher level interface */
@@ -831,22 +831,17 @@
return ios_write(s, &ch, 1);
}
-int
-ios_getc(ios_t *s)
+static void
+ios_loc(ios_t *s, uint8_t ch)
{
- uint8_t ch;
- if(s->state == bst_rd && s->bpos < s->size)
- ch = s->buf[s->bpos++];
- else if(s->_eof || ios_read(s, &ch, 1) < 1)
- return IOS_EOF;
if(ch == '\n'){
- s->lineno++;
- s->colno = 0;
+ s->loc.lineno++;
+ s->loc.colno = 0;
s->colnowait = 0;
}else if(s->colnowait > 0){
s->colnowait--;
}else{
- s->colno++;
+ s->loc.colno++;
if(ch & 0x80){
if((ch & 0xe0) == 0xc0)
s->colnowait = 1;
@@ -856,6 +851,17 @@
s->colnowait = 3;
}
}
+}
+
+int
+ios_getc(ios_t *s)
+{
+ uint8_t ch;
+ if(s->state == bst_rd && s->bpos < s->size)
+ ch = s->buf[s->bpos++];
+ else if(s->_eof || ios_read(s, &ch, 1) < 1)
+ return IOS_EOF;
+ ios_loc(s, ch);
return ch;
}
@@ -918,9 +924,9 @@
if(*r == Runeerror)
return 0;
if(*r == '\n')
- s->colno = 0;
+ s->loc.colno = 0;
else
- s->colno++;
+ s->loc.colno++;
return 1;
}
@@ -934,8 +940,10 @@
void
ios_purge(ios_t *s)
{
- if(s->state == bst_rd)
- s->bpos = s->size;
+ if(s->state == bst_rd){
+ for(; s->bpos < s->size; s->bpos++)
+ ios_loc(s, s->buf[s->bpos]);
+ }
}
int
--- a/ios.h
+++ b/ios.h
@@ -18,13 +18,17 @@
#define IOS_BUFSIZE 32768
typedef struct {
+ char *filename;
+ uint32_t lineno;
+ uint32_t colno;
+}ios_loc_t;
+
+typedef struct {
uint8_t *buf; // start of buffer
size_t maxsize; // space allocated to buffer
size_t size; // length of valid data in buf, >=ndirty
size_t bpos; // current position in buffer
size_t ndirty; // # bytes at &buf[0] that need to be written
- size_t lineno;
- size_t colno;
off_t fpos; // cached file pos
bufmode_t bm;
int colnowait;
@@ -54,7 +58,7 @@
// request durable writes (fsync)
// uint8_t durable:1;
- char *filename;
+ ios_loc_t loc;
// todo: mutex
uint8_t local[IOS_INLSIZE];
--- a/iostream.c
+++ b/iostream.c
@@ -10,9 +10,9 @@
{
USED(v);
fl_print_str("#<io stream", f);
- if(*f->filename){
+ if(*f->loc.filename){
fl_print_chr(' ', f);
- fl_print_str(f->filename, f);
+ fl_print_str(f->loc.filename, f);
}
fl_print_chr('>', f);
}
@@ -351,19 +351,19 @@
BUILTIN("io-filename", io_filename)
{
argcount(nargs, 1);
- return cvalue_static_cstring(toiostream(args[0])->filename);
+ return cvalue_static_cstring(toiostream(args[0])->loc.filename);
}
BUILTIN("io-line", io_line)
{
argcount(nargs, 1);
- return size_wrap(toiostream(args[0])->lineno);
+ return size_wrap(toiostream(args[0])->loc.lineno);
}
BUILTIN("io-set-line!", io_set_line)
{
argcount(nargs, 2);
- toiostream(args[0])->lineno = tosize(args[1]);
+ toiostream(args[0])->loc.lineno = tosize(args[1]);
return FL_t;
}
@@ -370,13 +370,13 @@
BUILTIN("io-column", io_column)
{
argcount(nargs, 1);
- return size_wrap(toiostream(args[0])->colno);
+ return size_wrap(toiostream(args[0])->loc.colno);
}
BUILTIN("io-set-column!", io_set_column)
{
argcount(nargs, 2);
- toiostream(args[0])->colno = tosize(args[1]);
+ toiostream(args[0])->loc.colno = tosize(args[1]);
return FL_t;
}
--- a/read.c
+++ b/read.c
@@ -10,11 +10,14 @@
TOK_OPENC, TOK_CLOSEC,
};
+#define PAtLoc "at %"PRIu32":%"PRIu32
+
typedef struct Rctx Rctx;
struct Rctx {
uint32_t toktype;
value_t tokval;
+ ios_loc_t loc;
char buf[1024];
};
@@ -110,7 +113,7 @@
}
static _Noreturn void
-parse_error(const char *format, ...)
+parse_error(ios_loc_t *loc, const char *format, ...)
{
char msgbuf[512];
va_list args;
@@ -117,7 +120,7 @@
int n;
n = snprintf(msgbuf, sizeof(msgbuf), "%s:%"PRIu64":%"PRIu64": ",
- RS->filename, (uint64_t)RS->lineno, (uint64_t)RS->colno);
+ loc->filename, (uint64_t)loc->lineno, (uint64_t)loc->colno);
if(n >= (int)sizeof(msgbuf))
n = 0;
va_start(args, format);
@@ -133,7 +136,7 @@
{
ctx->buf[(*pi)++] = c;
if(*pi >= (int)(sizeof(ctx->buf)-1))
- parse_error("token too long");
+ parse_error(&ctx->loc, "token too long");
}
// return: 1 if escaped (forced to be symbol)
@@ -194,6 +197,7 @@
if(ctx->toktype != TOK_NONE)
return ctx->toktype;
c = nextchar();
+ ctx->loc = RS->loc;
if(ios_eof(RS))
return TOK_NONE;
if(c == '(')
@@ -217,7 +221,7 @@
else if(c == '#'){
ch = ios_getc(RS); c = (char)ch;
if(ch == IOS_EOF)
- parse_error("invalid read macro");
+ parse_error(&ctx->loc, "invalid read macro");
if(c == '.')
ctx->toktype = TOK_SHARPDOT;
else if(c == '\'')
@@ -225,12 +229,12 @@
else if(c == '\\'){
Rune cval;
if(ios_getutf8(RS, &cval) == IOS_EOF)
- parse_error("end of input in character constant");
+ parse_error(&ctx->loc, "end of input in character constant");
if(cval == 'u' || cval == 'U' || cval == 'x'){
read_token(ctx, 'u', 0);
if(ctx->buf[1] != '\0'){ // not a solitary 'u','U','x'
if(!fl_read_numtok(&ctx->buf[1], &ctx->tokval, 16))
- parse_error("invalid hex character constant");
+ parse_error(&ctx->loc, "invalid hex character constant");
cval = numval(ctx->tokval);
}
}else if(cval >= 'a' && cval <= 'z'){
@@ -250,7 +254,7 @@
else if(ctx->tokval == FL(spacesym)) cval = 0x20;
else if(ctx->tokval == FL(deletesym)) cval = 0x7F;
else
- parse_error("unknown character #\\%s", ctx->buf);
+ parse_error(&ctx->loc, "unknown character #\\%s", ctx->buf);
}
ctx->toktype = TOK_NUM;
ctx->tokval = mk_rune(cval);
@@ -257,7 +261,7 @@
}else if(c == '('){
ctx->toktype = TOK_SHARPOPEN;
}else if(c == '<'){
- parse_error("unreadable object");
+ parse_error(&ctx->loc, "unreadable object");
}else if(isdigit(c)){
read_token(ctx, c, 1);
c = (char)ios_getc(RS);
@@ -266,10 +270,10 @@
else if(c == '=')
ctx->toktype = TOK_LABEL;
else
- parse_error("invalid label");
+ parse_error(&ctx->loc, "invalid label");
x = strtoll(ctx->buf, &end, 10);
if(*end != '\0')
- parse_error("invalid label");
+ parse_error(&ctx->loc, "invalid label");
ctx->tokval = fixnum(x);
}else if(c == '!'){
// #! single line comment for shbang script support
@@ -284,7 +288,7 @@
ch = ios_getc(RS);
hashpipe_gotc:
if(ch == IOS_EOF)
- parse_error("eof within comment");
+ parse_error(&ctx->loc, "eof within comment");
if((char)ch == '|'){
ch = ios_getc(RS);
if((char)ch == '#'){
@@ -317,7 +321,7 @@
read_token(ctx, (char)ch, 0);
x = strtol(ctx->buf, &end, 10);
if(*end != '\0' || ctx->buf[0] == '\0')
- parse_error("invalid gensym label");
+ parse_error(&ctx->loc, "invalid gensym label");
ctx->toktype = TOK_GENSYM;
ctx->tokval = fixnum(x);
}else if(symchar(c)){
@@ -328,7 +332,7 @@
(c == 'd' && (base = 10)) ||
(c == 'x' && (base = 16))) && (isdigit_base(ctx->buf[1], base) || ctx->buf[1] == '-')){
if(!fl_read_numtok(&ctx->buf[1], &ctx->tokval, base))
- parse_error("invalid base %d constant", base);
+ parse_error(&ctx->loc, "invalid base %d constant", base);
return (ctx->toktype = TOK_NUM);
}
@@ -335,7 +339,7 @@
ctx->toktype = TOK_SHARPSYM;
ctx->tokval = symbol(ctx->buf, true);
}else{
- parse_error("unknown read macro");
+ parse_error(&ctx->loc, "unknown read macro");
}
}else if(c == ','){
ctx->toktype = TOK_COMMA;
@@ -396,7 +400,7 @@
ptrhash_put(&FL(readstate)->backrefs, (void*)label, (void*)v);
while(peek(ctx) != closer){
if(ios_eof(RS))
- parse_error("unexpected end of input");
+ parse_error(&ctx->loc, "unexpected end of input");
v = FL(stack)[FL(sp)-1]; // reload after possible alloc in peek()
if(i >= vector_size(v)){
v = FL(stack)[FL(sp)-1] = vector_grow(v, label != UNBOUND);
@@ -438,7 +442,7 @@
if(temp == nil){
if(buf == ctx->buf)
MEM_FREE(buf);
- parse_error("out of memory reading string");
+ parse_error(&ctx->loc, "out of memory reading string");
}
buf = temp;
}
@@ -446,7 +450,7 @@
if(c == IOS_EOF){
if(buf != ctx->buf)
MEM_FREE(buf);
- parse_error("unexpected end of input in string");
+ parse_error(&ctx->loc, "unexpected end of input in string");
}
if(c == '"')
break;
@@ -455,7 +459,7 @@
if(c == IOS_EOF){
if(buf != ctx->buf)
MEM_FREE(buf);
- parse_error("end of input in escape sequence");
+ parse_error(&ctx->loc, "end of input in escape sequence");
}
j = 0;
if(octal_digit(c)){
@@ -484,7 +488,7 @@
if(!j || r > Runemax){
if(buf != ctx->buf)
MEM_FREE(buf);
- parse_error("invalid escape sequence");
+ parse_error(&ctx->loc, "invalid escape sequence");
}
if(ndig == 2)
buf[i++] = (char)r;
@@ -497,7 +501,14 @@
if(esc == (char)c && !strchr("\\'\"`", esc)){
if(buf != ctx->buf)
MEM_FREE(buf);
- parse_error("invalid escape sequence: \\%c", (char)c);
+ ios_loc_t *l = &RS->loc;
+ parse_error(
+ &ctx->loc,
+ "invalid escape sequence \\%c "PAtLoc,
+ (char)c,
+ l->lineno,
+ l->colno
+ );
}
buf[i++] = esc;
}
@@ -520,16 +531,16 @@
{
value_t c, *pc;
uint32_t t;
- uint64_t lineno0, colno0;
+ ios_loc_t loc0;
- lineno0 = RS->lineno;
- colno0 = RS->colno - 1;
+ loc0 = RS->loc;
+ loc0.colno--;
PUSH(FL_nil);
pc = &FL(stack)[FL(sp)-1]; // to keep track of current cons cell
t = peek(ctx);
while(t != closer){
if(ios_eof(RS))
- parse_error("unexpected end of input: %"PRIu64":%"PRIu64" not closed", lineno0, colno0);
+ parse_error(&loc0, "not closed: unexpected EOI "PAtLoc, ctx->loc.lineno, ctx->loc.colno);
c = mk_cons(); car_(c) = cdr_(c) = FL_nil;
if(iscons(*pc))
cdr_(*pc) = c;
@@ -549,10 +560,11 @@
cdr_(*pc) = c;
t = peek(ctx);
if(ios_eof(RS))
- parse_error("unexpected end of input");
+ parse_error(&ctx->loc, "unexpected end of input");
if(t != closer){
take(ctx);
parse_error(
+ &ctx->loc,
"expected '%c'",
closer == TOK_CLOSEB ? ']' : (closer == TOK_CLOSEC ? '}' : ')')
);
@@ -623,9 +635,10 @@
return FL_f;
// constructor notation
c = nextchar();
+ ctx->loc = RS->loc;
if(c != '('){
take(ctx);
- parse_error("expected argument list for %s", symbol_name(ctx->tokval));
+ parse_error(&ctx->loc, "expected argument list for %s", symbol_name(ctx->tokval));
}
PUSH(FL_nil);
read_list(ctx, &FL(stack)[FL(sp)-1], UNBOUND, TOK_CLOSE);
@@ -658,7 +671,7 @@
case TOK_LABEL:
// create backreference label
if(ptrhash_has(&FL(readstate)->backrefs, (void*)ctx->tokval))
- parse_error("label %"PRIdPTR" redefined", numval(ctx->tokval));
+ parse_error(&ctx->loc, "label %"PRIdPTR" redefined", numval(ctx->tokval));
oldtokval = ctx->tokval;
v = do_read_sexpr(ctx, ctx->tokval);
ptrhash_put(&FL(readstate)->backrefs, (void*)oldtokval, (void*)v);
@@ -667,7 +680,7 @@
// look up backreference
v = (value_t)ptrhash_get(&FL(readstate)->backrefs, (void*)ctx->tokval);
if(v == (value_t)HT_NOTFOUND)
- parse_error("undefined label %"PRIdPTR, numval(ctx->tokval));
+ parse_error(&ctx->loc, "undefined label %"PRIdPTR, numval(ctx->tokval));
return v;
case TOK_GENSYM:
pv = (value_t*)ptrhash_bp(&FL(readstate)->gensyms, (void*)ctx->tokval);
@@ -677,13 +690,13 @@
case TOK_DOUBLEQUOTE:
return read_string(ctx);
case TOK_CLOSE:
- parse_error("unexpected ')'");
+ parse_error(&ctx->loc, "unexpected ')'");
case TOK_CLOSEB:
- parse_error("unexpected ']'");
+ parse_error(&ctx->loc, "unexpected ']'");
case TOK_CLOSEC:
- parse_error("unexpected '}'");
+ parse_error(&ctx->loc, "unexpected '}'");
case TOK_DOT:
- parse_error("unexpected '.'");
+ parse_error(&ctx->loc, "unexpected '.'");
}
return FL_unspecified;
}