ref: 06df0a14302c5f1e5c9bd52078711453e28dc681
parent: bcc3d3e54ac5538e30c01b4407b987ac4e4c0b13
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Mon Mar 27 11:39:17 EDT 2023
import "error on invalid UTF-8 in source files" patch by Jeff Bezanson from Julia
--- a/iostream.c
+++ b/iostream.c
@@ -124,9 +124,12 @@
argcount("io.getc", nargs, 1);
ios_t *s = toiostream(args[0], "io.getc");
uint32_t wc;
- if (ios_getutf8(s, &wc) == IOS_EOF)
+ int res;
+ if ((res = ios_getutf8(s, &wc)) == IOS_EOF)
//lerrorf(IOError, "io.getc: end of file reached");
return FL_EOF;
+ if (res == 0)
+ lerrorf(IOError, "io.getc: invalid UTF-8 sequence");
return mk_wchar(wc);
}
@@ -135,8 +138,11 @@
argcount("io.peekc", nargs, 1);
ios_t *s = toiostream(args[0], "io.peekc");
uint32_t wc;
- if (ios_peekutf8(s, &wc) == IOS_EOF)
+ int res;
+ if ((res = ios_peekutf8(s, &wc)) == IOS_EOF)
return FL_EOF;
+ if (res == 0)
+ lerrorf(IOError, "io.peekc: invalid UTF-8 sequence");
return mk_wchar(wc);
}
--- a/llt/ios.c
+++ b/llt/ios.c
@@ -872,15 +872,20 @@
return 1;
}
sz = u8_seqlen(&c0)-1;
+ if (!isutf(c0) || sz > 3)
+ return 0;
if (ios_readprep(s, sz) < sz) {
// NOTE: this returns EOF even though some bytes are available
// so we do not set s->_eof on this code path
return IOS_EOF;
}
- size_t i = s->bpos;
- *pwc = u8_nextchar(s->buf, &i);
- ios_read(s, buf, sz+1);
- return 1;
+ if (u8_isvalid(&s->buf[s->bpos], sz+1)) {
+ size_t i = s->bpos;
+ *pwc = u8_nextchar(s->buf, &i);
+ ios_read(s, buf, sz+1);
+ return 1;
+ }
+ return 0;
}
int ios_peekutf8(ios_t *s, uint32_t *pwc)
@@ -898,11 +903,16 @@
return 1;
}
sz = u8_seqlen(&c0)-1;
+ if (!isutf(c0) || sz > 3)
+ return 0;
if (ios_readprep(s, sz) < sz)
return IOS_EOF;
- size_t i = s->bpos;
- *pwc = u8_nextchar(s->buf, &i);
- return 1;
+ if (u8_isvalid(&s->buf[s->bpos], sz+1)) {
+ size_t i = s->bpos;
+ *pwc = u8_nextchar(s->buf, &i);
+ return 1;
+ }
+ return 0;
}
int ios_pututf8(ios_t *s, uint32_t wc)