shithub: femtolisp

Download patch

ref: 830e1c986ce5347fe7d80fc47ea7973aab0bfcb2
parent: 8e4ba69a7bfc6aa49f0b33ff098869204b1487e1
author: JeffBezanson <[email protected]>
date: Tue Dec 23 23:43:36 EST 2008

renaming 'char' type to 'byte' to avoid confusion
wchar will be used for all individual characters

adding string.find function

fixing bug in #sym(...) if sym was undefined


--- a/femtolisp/cvalues.c
+++ b/femtolisp/cvalues.c
@@ -9,7 +9,7 @@
 
 value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
 value_t int64sym, uint64sym;
-value_t longsym, ulongsym, charsym, wcharsym;
+value_t longsym, ulongsym, bytesym, wcharsym;
 value_t floatsym, doublesym;
 value_t gftypesym, stringtypesym, wcstringtypesym;
 value_t emptystringsym;
@@ -25,7 +25,7 @@
 static fltype_t *int64type, *uint64type;
 static fltype_t *longtype, *ulongtype;
 static fltype_t *floattype, *doubletype;
-       fltype_t *chartype, *wchartype;
+       fltype_t *bytetype, *wchartype;
        fltype_t *stringtype, *wcstringtype;
        fltype_t *builtintype;
 
@@ -231,11 +231,11 @@
 }
 */
 
-#define num_ctor(typenam, cnvt, tag)                                    \
+#define num_ctor(typenam, ctype, cnvt, tag)                             \
 static void cvalue_##typenam##_init(fltype_t *type, value_t arg,        \
                                     void *dest)                         \
 {                                                                       \
-    typenam##_t n=0;                                                    \
+    ctype##_t n=0;                                                      \
     (void)type;                                                         \
     if (isfixnum(arg)) {                                                \
         n = numval(arg);                                                \
@@ -244,7 +244,7 @@
         cvalue_t *cv = (cvalue_t*)ptr(arg);                             \
         void *p = cv_data(cv);                                          \
         if (valid_numtype(cv_numtype(cv)))                              \
-            n = (typenam##_t)conv_to_##cnvt(p, cv_numtype(cv));         \
+            n = (ctype##_t)conv_to_##cnvt(p, cv_numtype(cv));           \
         else                                                            \
             goto cnvt_error;                                            \
     }                                                                   \
@@ -251,7 +251,7 @@
     else {                                                              \
         goto cnvt_error;                                                \
     }                                                                   \
-    *((typenam##_t*)dest) = n;                                          \
+    *((ctype##_t*)dest) = n;                                            \
     return;                                                             \
  cnvt_error:                                                            \
     type_error(#typenam, "number", arg);                                \
@@ -259,37 +259,37 @@
 value_t cvalue_##typenam(value_t *args, u_int32_t nargs)                \
 {                                                                       \
     if (nargs==0) { PUSH(fixnum(0)); args = &Stack[SP-1]; }             \
-    value_t cv = cvalue(typenam##type, sizeof(typenam##_t));            \
+    value_t cv = cvalue(typenam##type, sizeof(ctype##_t));              \
     cvalue_##typenam##_init(typenam##type,                              \
                             args[0], &((cvalue_t*)ptr(cv))->_space[0]); \
     return cv;                                                          \
 }                                                                       \
-value_t mk_##typenam(typenam##_t n)                                     \
+value_t mk_##typenam(ctype##_t n)                                       \
 {                                                                       \
-    value_t cv = cvalue(typenam##type, sizeof(typenam##_t));            \
-    *(typenam##_t*)&((cvalue_t*)ptr(cv))->_space[0] = n;                \
+    value_t cv = cvalue(typenam##type, sizeof(ctype##_t));              \
+    *(ctype##_t*)&((cvalue_t*)ptr(cv))->_space[0] = n;                  \
     return cv;                                                          \
 }
 
-num_ctor(int8, int32, T_INT8)
-num_ctor(uint8, uint32, T_UINT8)
-num_ctor(int16, int32, T_INT16)
-num_ctor(uint16, uint32, T_UINT16)
-num_ctor(int32, int32, T_INT32)
-num_ctor(uint32, uint32, T_UINT32)
-num_ctor(int64, int64, T_INT64)
-num_ctor(uint64, uint64, T_UINT64)
-num_ctor(char, uint32, T_UINT8)
-num_ctor(wchar, int32, T_INT32)
+num_ctor(int8, int8, int32, T_INT8)
+num_ctor(uint8, uint8, uint32, T_UINT8)
+num_ctor(int16, int16, int32, T_INT16)
+num_ctor(uint16, uint16, uint32, T_UINT16)
+num_ctor(int32, int32, int32, T_INT32)
+num_ctor(uint32, uint32, uint32, T_UINT32)
+num_ctor(int64, int64, int64, T_INT64)
+num_ctor(uint64, uint64, uint64, T_UINT64)
+num_ctor(byte,  uint8, uint32, T_UINT8)
+num_ctor(wchar, int32, int32, T_INT32)
 #ifdef BITS64
-num_ctor(long, int64, T_INT64)
-num_ctor(ulong, uint64, T_UINT64)
+num_ctor(long, long, int64, T_INT64)
+num_ctor(ulong, ulong, uint64, T_UINT64)
 #else
-num_ctor(long, int32, T_INT32)
-num_ctor(ulong, uint32, T_UINT32)
+num_ctor(long, long, int32, T_INT32)
+num_ctor(ulong, ulong, uint32, T_UINT32)
 #endif
-num_ctor(float, double, T_FLOAT)
-num_ctor(double, double, T_DOUBLE)
+num_ctor(float, float, double, T_FLOAT)
+num_ctor(double, double, double, T_DOUBLE)
 
 value_t size_wrap(size_t sz)
 {
@@ -313,14 +313,6 @@
     return 0;
 }
 
-value_t char_from_code(uint32_t code)
-{
-    value_t ccode = fixnum(code);
-    if (code > 0x7f)
-        return cvalue_wchar(&ccode, 1);
-    return cvalue_char(&ccode, 1);
-}
-
 static void cvalue_enum_init(fltype_t *ft, value_t arg, void *dest)
 {
     int n=0;
@@ -457,7 +449,7 @@
 static value_t alloc_array(fltype_t *type, size_t sz)
 {
     value_t cv;
-    if (type->eltype == chartype) {
+    if (type->eltype == bytetype) {
         cv = cvalue_string(sz);
     }
     else {
@@ -556,7 +548,7 @@
 // *palign is an output argument giving the alignment required by type
 size_t ctype_sizeof(value_t type, int *palign)
 {
-    if (type == int8sym || type == uint8sym || type == charsym) {
+    if (type == int8sym || type == uint8sym || type == bytesym) {
         *palign = 1;
         return 1;
     }
@@ -672,7 +664,7 @@
 {
     if (type == int8sym)
         return T_INT8;
-    else if (type == uint8sym || type == charsym)
+    else if (type == uint8sym || type == bytesym)
         return T_UINT8;
     else if (type == int16sym)
         return T_INT16;
@@ -868,7 +860,7 @@
     ctor_cv_intern(uint32);
     ctor_cv_intern(int64);
     ctor_cv_intern(uint64);
-    ctor_cv_intern(char);
+    ctor_cv_intern(byte);
     ctor_cv_intern(wchar);
     ctor_cv_intern(long);
     ctor_cv_intern(ulong);
@@ -890,7 +882,7 @@
     // todo: autorelease
 
     stringtypesym = symbol("*string-type*");
-    setc(stringtypesym, list2(arraysym, charsym));
+    setc(stringtypesym, list2(arraysym, bytesym));
 
     wcstringtypesym = symbol("*wcstring-type*");
     setc(wcstringtypesym, list2(arraysym, wcharsym));
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@@ -904,7 +904,7 @@
                     v = size_wrap(cvalue_arraylen(Stack[SP-1]));
                     break;
                 }
-                else if (v == charsym) {
+                else if (v == bytesym) {
                     v = fixnum(1);
                     break;
                 }
--- a/femtolisp/flisp.h
+++ b/femtolisp/flisp.h
@@ -212,7 +212,7 @@
 #define cv_type(cv)    (cv_class(cv)->type)
 #define cv_data(cv)    ((cv)->data)
 #define cv_numtype(cv) (cv_class(cv)->numtype)
-#define cv_isstr(cv)   (cv_class(cv)->eltype == chartype)
+#define cv_isstr(cv)   (cv_class(cv)->eltype == bytetype)
 
 #define cvalue_data(v) cv_data((cvalue_t*)ptr(v))
 
@@ -232,11 +232,11 @@
 
 extern value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
 extern value_t int64sym, uint64sym;
-extern value_t longsym, ulongsym, charsym, ucharsym, wcharsym;
+extern value_t longsym, ulongsym, bytesym, wcharsym;
 extern value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym;
 extern value_t stringtypesym, wcstringtypesym, emptystringsym;
 extern value_t unionsym, floatsym, doublesym, builtinsym;
-extern fltype_t *chartype, *wchartype;
+extern fltype_t *bytetype, *wchartype;
 extern fltype_t *stringtype, *wcstringtype;
 extern fltype_t *builtintype;
 
@@ -266,9 +266,9 @@
 value_t mk_float(float_t n);
 value_t mk_uint32(uint32_t n);
 value_t mk_uint64(uint64_t n);
+value_t mk_wchar(int32_t n);
 value_t return_from_uint64(uint64_t Uaccum);
 value_t return_from_int64(int64_t Saccum);
-value_t char_from_code(uint32_t code);
 
 typedef struct {
     char *name;
@@ -279,7 +279,7 @@
 
 /* builtins */
 value_t fl_hash(value_t *args, u_int32_t nargs);
-value_t cvalue_char(value_t *args, uint32_t nargs);
+value_t cvalue_byte(value_t *args, uint32_t nargs);
 value_t cvalue_wchar(value_t *args, uint32_t nargs);
 
 #endif
--- a/femtolisp/print.c
+++ b/femtolisp/print.c
@@ -411,39 +411,19 @@
 {
     int64_t tmp=0;
 
-    if (type == charsym) {
-        // print chars as characters when possible
+    if (type == bytesym) {
         unsigned char ch = *(unsigned char*)data;
         if (princ)
             outc(ch, f);
         else if (weak)
-            HPOS+=ios_printf(f, "%hhu", ch);
-        else if (isprint(ch))
-            HPOS+=ios_printf(f, "#\\%c", ch);
+            HPOS+=ios_printf(f, "0x%hhx", ch);
         else
-            HPOS+=ios_printf(f, "#char(%hhu)", ch);
+            HPOS+=ios_printf(f, "#byte(0x%hhx)", ch);
     }
-    /*
-    else if (type == ucharsym) {
-        uchar ch = *(uchar*)data;
-        if (princ)
-            outc(ch, f);
-        else {
-            if (!weak)
-                ios_printf(f, "#uchar(");
-            ios_printf(f, "%hhu", ch);
-            if (!weak)
-                outs(")", f);
-        }
-    }
-    */
     else if (type == wcharsym) {
         uint32_t wc = *(uint32_t*)data;
         char seq[8];
-        if (weak)
-            HPOS+=ios_printf(f, "%d", (int)wc);
-        else if (princ || (iswprint(wc) && wc>0x7f)) {
-            // reader only reads #\c syntax as wchar if the code is >0x7f
+        if (princ || iswprint(wc)) {
             size_t nb = u8_toutf8(seq, sizeof(seq), &wc, 1);
             seq[nb] = '\0';
             // TODO: better multibyte handling
@@ -450,6 +430,9 @@
             if (!princ) outs("#\\", f);
             outs(seq, f);
         }
+        else if (weak) {
+            HPOS+=ios_printf(f, "%d", (int)wc);
+        }
         else {
             HPOS+=ios_printf(f, "#%s(%d)", symbol_name(type), (int)wc);
         }
@@ -544,7 +527,7 @@
                 elsize = ctype_sizeof(eltype, &junk);
                 cnt = elsize ? len/elsize : 0;
             }
-            if (eltype == charsym) {
+            if (eltype == bytesym) {
                 if (princ) {
                     ios_write(f, data, len);
                 }
--- a/femtolisp/read.c
+++ b/femtolisp/read.c
@@ -189,13 +189,7 @@
             if (ios_getutf8(f, &cval) == IOS_EOF)
                 lerror(ParseError, "read: end of input in character constant");
             toktype = TOK_NUM;
-            tokval = fixnum(cval);
-            if (cval > 0x7f) {
-                tokval = cvalue_wchar(&tokval, 1);
-            }
-            else {
-                tokval = cvalue_char(&tokval, 1);
-            }
+            tokval = mk_wchar(cval);
         }
         else if ((char)ch == '(') {
             toktype = TOK_SHARPOPEN;
@@ -501,7 +495,7 @@
         PUSH(NIL);
         read_list(f, &Stack[SP-1], UNBOUND);
         v = POP();
-        return apply(symbol_value(sym), v);
+        return apply(toplevel_eval(sym), v);
     case TOK_OPENB:
         return read_vector(f, label, TOK_CLOSEB);
     case TOK_SHARPOPEN:
--- a/femtolisp/string.c
+++ b/femtolisp/string.c
@@ -109,7 +109,7 @@
         else if (iscvalue(args[i])) {
             temp = (cvalue_t*)ptr(args[i]);
             t = cv_type(temp);
-            if (t == charsym) {
+            if (t == bytesym) {
                 sz++;
                 continue;
             }
@@ -136,7 +136,7 @@
             temp = (cvalue_t*)ptr(args[i]);
             t = cv_type(temp);
             data = cvalue_data(args[i]);
-            if (t == charsym) {
+            if (t == bytesym) {
                 *ptr++ = *(char*)data;
             }
             else if (t == wcharsym) {
@@ -225,9 +225,61 @@
     size_t sl = u8_seqlen(&s[i]);
     if (sl > len || i > len-sl)
         bounds_error("string.char", args[0], args[1]);
-    return char_from_code(u8_nextchar(s, &i));
+    return mk_wchar(u8_nextchar(s, &i));
 }
 
+static value_t mem_find_byte(char *s, char c, size_t start, size_t len)
+{
+    char *p = memchr(s+start, c, len-start);
+    if (p == NULL)
+        return NIL;
+    return size_wrap((size_t)(p - s));
+}
+
+value_t fl_string_find(value_t *args, u_int32_t nargs)
+{
+    char cbuf[8];
+    size_t start = 0;
+    if (nargs == 3)
+        start = toulong(args[2], "string.find");
+    else
+        argcount("string.find", nargs, 2);
+    char *s = tostring(args[0], "string.find");
+    size_t len = cv_len((cvalue_t*)ptr(args[0]));
+    if (start > len)
+        bounds_error("string.find", args[0], args[2]);
+    char *needle=NULL; size_t needlesz=0;
+    if (!iscvalue(args[1]))
+        type_error("string.find", "string", args[1]);
+    cvalue_t *cv = (cvalue_t*)ptr(args[1]);
+    if (isstring(args[1])) {
+        needlesz = cv_len(cv);
+        needle = (char*)cv_data(cv);
+    }
+    else if (cv_class(cv) == wchartype) {
+        uint32_t c = *(uint32_t*)cv_data(cv);
+        if (c <= 0x7f)
+            return mem_find_byte(s, (char)c, start, len);
+        needlesz = u8_toutf8(cbuf, sizeof(cbuf), &c, 1);
+        needle = cbuf;
+    }
+    else if (cv_class(cv) == bytetype) {
+        return mem_find_byte(s, *(char*)cv_data(cv), start, len);
+    }
+    if (needlesz == 0)
+        return fixnum(start);
+    if (needlesz > len-start)
+        return NIL;
+    size_t i;
+    for(i=start; i < len; i++) {
+        if (s[i] == needle[0]) {
+            if (!memcmp(&s[i], needle, needlesz))
+                return size_wrap(i);
+        }
+    }
+    return NIL;
+}
+
 value_t fl_string_inc(value_t *args, u_int32_t nargs)
 {
     if (nargs < 2 || nargs > 3)
@@ -274,6 +326,7 @@
     { "string.length", fl_string_length },
     { "string.split", fl_string_split },
     { "string.sub", fl_string_sub },
+    { "string.find", fl_string_find },
     { "string.char", fl_string_char },
     { "string.inc", fl_string_inc },
     { "string.dec", fl_string_dec },
--- a/femtolisp/system.lsp
+++ b/femtolisp/system.lsp
@@ -87,8 +87,8 @@
 
 (define (cadr x) (car (cdr x)))
 
-;(setq *special-forms* '(quote cond if and or while lambda label trycatch
-;                        %top progn))
+;(setq *special-forms* '(quote cond if and or while lambda trycatch
+;                        setq progn))
 
 (defun macroexpand (e)
   ((label mexpand
@@ -101,8 +101,7 @@
               (cond ((and (consp e)
                           (not (eq (car e) 'quote)))
                      (let ((newenv
-                            (if (and (or (eq (car e) 'lambda)
-                                         (eq (car e) 'label))
+                            (if (and (eq (car e) 'lambda)
                                      (consp (cdr e)))
                                 (append.2 (cadr e) env)
                               env)))
--- a/femtolisp/todo
+++ b/femtolisp/todo
@@ -818,7 +818,8 @@
 *string.split   - (string.split s sep-chars)
  string.trim    - (string.trim s chars-at-start chars-at-end)
 *string.reverse
- string.find    - (string.find s str|char), or nil if not found
+*string.find    - (string.find s str|char [offs]), or nil if not found
+ string.rfind
  string.map     - (string.map f s)
 *string.encode  - to utf8
 *string.decode  - from utf8 to UCS
--- a/femtolisp/types.c
+++ b/femtolisp/types.c
@@ -114,7 +114,7 @@
     mk_primtype(uint64);
     mk_primtype(long);
     mk_primtype(ulong);
-    mk_primtype(char);
+    mk_primtype(byte);
     mk_primtype(wchar);
     mk_primtype(float);
     mk_primtype(double);
--- a/femtolisp/unittest.lsp
+++ b/femtolisp/unittest.lsp
@@ -64,7 +64,7 @@
 
 (assert (equal (uint64 (double -123)) #uint64(0xffffffffffffff85)))
 
-(assert (equal (string 'sym #char(65) #wchar(945) "blah") "symA\u03B1blah"))
+(assert (equal (string 'sym #byte(65) #wchar(945) "blah") "symA\u03B1blah"))
 
 ; this crashed once
 (for 1 10 (lambda (i) 0))