shithub: femtolisp

Download patch

ref: 06df0a14302c5f1e5c9bd52078711453e28dc681
parent: bcc3d3e54ac5538e30c01b4407b987ac4e4c0b13
author: Sigrid Solveig Haflínudóttir <[email protected]>
date: Mon Mar 27 11:39:17 EDT 2023

import "error on invalid UTF-8 in source files" patch by Jeff Bezanson from Julia

--- a/iostream.c
+++ b/iostream.c
@@ -124,9 +124,12 @@
     argcount("io.getc", nargs, 1);
     ios_t *s = toiostream(args[0], "io.getc");
     uint32_t wc;
-    if (ios_getutf8(s, &wc) == IOS_EOF)
+    int res;
+    if ((res = ios_getutf8(s, &wc)) == IOS_EOF)
         //lerrorf(IOError, "io.getc: end of file reached");
         return FL_EOF;
+    if (res == 0)
+        lerrorf(IOError, "io.getc: invalid UTF-8 sequence");
     return mk_wchar(wc);
 }
 
@@ -135,8 +138,11 @@
     argcount("io.peekc", nargs, 1);
     ios_t *s = toiostream(args[0], "io.peekc");
     uint32_t wc;
-    if (ios_peekutf8(s, &wc) == IOS_EOF)
+    int res;
+    if ((res = ios_peekutf8(s, &wc)) == IOS_EOF)
         return FL_EOF;
+    if (res == 0)
+        lerrorf(IOError, "io.peekc: invalid UTF-8 sequence");
     return mk_wchar(wc);
 }
 
--- a/llt/ios.c
+++ b/llt/ios.c
@@ -872,15 +872,20 @@
         return 1;
     }
     sz = u8_seqlen(&c0)-1;
+    if (!isutf(c0) || sz > 3)
+        return 0;
     if (ios_readprep(s, sz) < sz) {
         // NOTE: this returns EOF even though some bytes are available
         // so we do not set s->_eof on this code path
         return IOS_EOF;
     }
-    size_t i = s->bpos;
-    *pwc = u8_nextchar(s->buf, &i);
-    ios_read(s, buf, sz+1);
-    return 1;
+    if (u8_isvalid(&s->buf[s->bpos], sz+1)) {
+        size_t i = s->bpos;
+        *pwc = u8_nextchar(s->buf, &i);
+        ios_read(s, buf, sz+1);
+        return 1;
+    }
+    return 0;
 }
 
 int ios_peekutf8(ios_t *s, uint32_t *pwc)
@@ -898,11 +903,16 @@
         return 1;
     }
     sz = u8_seqlen(&c0)-1;
+    if (!isutf(c0) || sz > 3)
+        return 0;
     if (ios_readprep(s, sz) < sz)
         return IOS_EOF;
-    size_t i = s->bpos;
-    *pwc = u8_nextchar(s->buf, &i);
-    return 1;
+    if (u8_isvalid(&s->buf[s->bpos], sz+1)) {
+        size_t i = s->bpos;
+        *pwc = u8_nextchar(s->buf, &i);
+        return 1;
+    }
+    return 0;
 }
 
 int ios_pututf8(ios_t *s, uint32_t wc)