ref: fb7dd4b3a868cb8987049c95bb32e6425a73c8b9
dir: /appl/lib/sexprs.b/
implement Sexprs; # # full SDSI/SPKI S-expression reader # # Copyright © 2003-2004 Vita Nuova Holdings Limited # include "sys.m"; sys: Sys; include "encoding.m"; base64: Encoding; base16: Encoding; include "bufio.m"; bufio: Bufio; Iobuf: import bufio; include "sexprs.m"; Maxtoken: con 1024*1024; # should be more than enough Syntax: exception(string, big); Here: con big -1; Rd: adt[T] for { T => getb: fn(nil: self T): int; ungetb: fn(nil: self T): int; offset: fn(nil: self T): big; } { t: T; parseitem: fn(rd: self ref Rd[T]): ref Sexp raises (Syntax); ws: fn(rd: self ref Rd[T]): int; simplestring: fn(rd: self ref Rd[T], c: int, hint: string): ref Sexp raises (Syntax); toclosing: fn(rd: self ref Rd[T], c: int): string raises (Syntax); unquote: fn(rd: self ref Rd[T]): string raises (Syntax); }; init() { sys = load Sys Sys->PATH; base64 = load Encoding Encoding->BASE64PATH; base16 = load Encoding Encoding->BASE16PATH; bufio = load Bufio Bufio->PATH; bufio->sopen(""); } Sexp.read[T](t: T): (ref Sexp, string) for { T => getb: fn(nil: self T): int; ungetb: fn(nil: self T): int; offset: fn(nil: self T): big; } { { rd := ref Rd[T](t); e := rd.parseitem(); return (e, nil); }exception e { Syntax => (diag, pos) := e; if(pos < big 0) pos += t.offset(); return (nil, sys->sprint("%s at offset %bd", diag, pos)); } } Sexp.parse(s: string): (ref Sexp, string, string) { f := bufio->sopen(s); (e, diag) := Sexp.read(f); pos := int f.offset(); return (e, s[pos:], diag); } Sexp.unpack(a: array of byte): (ref Sexp, array of byte, string) { f := bufio->aopen(a); (e, diag) := Sexp.read(f); pos := int f.offset(); return (e, a[pos:], diag); } Rd[T].parseitem(rd: self ref Rd[T]): ref Sexp raises (Syntax) { p0 := rd.t.offset(); { c := rd.ws(); if(c < 0) return nil; case c { '{' => a := rd.toclosing('}'); f := bufio->aopen(base64->dec(a)); ht: type Rd[ref Iobuf]; nr := ref ht(f); return nr.parseitem(); '(' => lists: list of ref Sexp; while((c = rd.ws()) != ')'){ if(c < 0) raise Syntax("unclosed '('", p0); rd.t.ungetb(); e := rd.parseitem(); # we'll catch missing ) at top of loop lists = e :: lists; } rl := lists; lists = nil; for(; rl != nil; rl = tl rl) lists = hd rl :: lists; return ref Sexp.List(lists); '[' => # display hint e := rd.simplestring(rd.t.getb(), nil); c = rd.ws(); if(c != ']'){ if(c >= 0) rd.t.ungetb(); raise Syntax("missing ] in display hint", p0); } pick r := e { String => return rd.simplestring(rd.ws(), r.s); * => raise Syntax("illegal display hint", Here); } * => return rd.simplestring(c, nil); } }exception{ Syntax => raise; } } # skip white space Rd[T].ws(rd: self ref Rd[T]): int { while(isspace(c := rd.t.getb())) {} return c; } isspace(c: int): int { return c == ' ' || c == '\r' || c == '\t' || c == '\n'; } Rd[T].simplestring(rd: self ref Rd[T], c: int, hint: string): ref Sexp raises (Syntax) { dec := -1; decs: string; if(c >= '0' && c <= '9'){ for(dec = 0; c >= '0' && c <= '9'; c = rd.t.getb()){ dec = dec*10 + c-'0'; decs[len decs] = c; } if(dec < 0 || dec > Maxtoken) raise Syntax("implausible token length", Here); } { case c { '"' => text := rd.unquote(); return ref Sexp.String(text, hint); '|' => return sform(base64->dec(rd.toclosing(c)), hint); '#' => return sform(base16->dec(rd.toclosing(c)), hint); * => if(c == ':' && dec >= 0){ # raw bytes a := array[dec] of byte; for(i := 0; i < dec; i++){ c = rd.t.getb(); if(c < 0) raise Syntax("missing bytes in raw token", Here); a[i] = byte c; } return sform(a, hint); } #s := decs; if(decs != nil) raise Syntax("token can't start with a digit", Here); s: string; # <token> by definition is always printable; never utf-8 while(istokenc(c)){ s[len s] = c; c = rd.t.getb(); } if(s == nil) raise Syntax("missing token", Here); # consume c to ensure progress on error if(c >= 0) rd.t.ungetb(); return ref Sexp.String(s, hint); } }exception{ Syntax => raise; } } sform(a: array of byte, hint: string): ref Sexp { if(istextual(a)) return ref Sexp.String(string a, hint); return ref Sexp.Binary(a, hint); } Rd[T].toclosing(rd: self ref Rd[T], end: int): string raises (Syntax) { s: string; p0 := rd.t.offset(); while((c := rd.t.getb()) != end){ if(c < 0) raise Syntax(sys->sprint("missing closing '%c'", end), p0); s[len s] = c; } return s; } hex(c: int): int { if(c >= '0' && c <= '9') return c-'0'; if(c >= 'a' && c <= 'f') return 10+(c-'a'); if(c >= 'A' && c <= 'F') return 10+(c-'A'); return -1; } Rd[T].unquote(rd: self ref Rd[T]): string raises (Syntax) { os: string; p0 := rd.t.offset(); while((c := rd.t.getb()) != '"'){ if(c < 0) raise Syntax("unclosed quoted string", p0); if(c == '\\'){ e0 := rd.t.offset(); c = rd.t.getb(); if(c < 0) break; case c { '\r' => c = rd.t.getb(); if(c != '\n') rd.t.ungetb(); continue; '\n' => c = rd.t.getb(); if(c != '\r') rd.t.ungetb(); continue; 'b' => c = '\b'; 'f' => c = '\f'; 'n' => c = '\n'; 'r' => c = '\r'; 't' => c = '\t'; 'v' => c = '\v'; '0' to '7' => oct := 0; for(i := 0;;){ if(!(c >= '0' && c <= '7')) raise Syntax("illegal octal escape", e0); oct = (oct<<3) | (c-'0'); if(++i == 3) break; c = rd.t.getb(); } c = oct & 16rFF; 'x' => c0 := hex(rd.t.getb()); c1 := hex(rd.t.getb()); if(c0 < 0 || c1 < 0) raise Syntax("illegal hex escape", e0); c = (c0<<4) | c1; * => ; # as-is } } os[len os] = c; } return os; } hintlen(s: string): int { if(s == nil) return 0; n := len array of byte s; return len sys->aprint("[%d:]", n) + n; } Sexp.packedsize(e: self ref Sexp): int { if(e == nil) return 0; pick r := e{ String => n := len array of byte r.s; return hintlen(r.hint) + len sys->aprint("%d:", n) + n; Binary => n := len r.data; return hintlen(r.hint) + len sys->aprint("%d:", n) + n; List => n := 1; # '(' for(l := r.l; l != nil; l = tl l) n += (hd l).packedsize(); return n+1; # + ')' } } packbytes(a: array of byte, b: array of byte): array of byte { n := len b; c := sys->aprint("%d:", n); a[0:] = c; a[len c:] = b; return a[len c+n:]; } packhint(a: array of byte, s: string): array of byte { if(s == nil) return a; a[0] = byte '['; a = packbytes(a[1:], array of byte s); a[0] = byte ']'; return a[1:]; } pack(e: ref Sexp, a: array of byte): array of byte { if(e == nil) return array[0] of byte; pick r := e{ String => if(r.hint != nil) a = packhint(a, r.hint); return packbytes(a, array of byte r.s); Binary => if(r.hint != nil) a = packhint(a, r.hint); return packbytes(a, r.data); List => a[0] = byte '('; a = a[1:]; for(l := r.l; l != nil; l = tl l) a = pack(hd l, a); a[0] = byte ')'; return a[1:]; } } Sexp.pack(e: self ref Sexp): array of byte { a := array[e.packedsize()] of byte; pack(e, a); return a; } Sexp.b64text(e: self ref Sexp): string { return "{" + base64->enc(e.pack()) + "}"; } Sexp.text(e: self ref Sexp): string { if(e == nil) return ""; pick r := e{ String => s := quote(r.s); if(r.hint == nil) return s; return "["+quote(r.hint)+"]"+s; Binary => h := r.hint; if(h != nil) h = "["+quote(h)+"]"; if(len r.data <= 4) return sys->sprint("%s#%s#", h, base16->enc(r.data)); return sys->sprint("%s|%s|", h, base64->enc(r.data)); List => s := "("; for(l := r.l; l != nil; l = tl l){ s += (hd l).text(); if(tl l != nil) s += " "; } return s+")"; } } #An octet string that meets the following conditions may be given #directly as a "token". # # -- it does not begin with a digit # # -- it contains only characters that are # -- alphabetic (upper or lower case), # -- numeric, or # -- one of the eight "pseudo-alphabetic" punctuation marks: # - . / _ : * + = # (Note: upper and lower case are not equivalent.) # (Note: A token may begin with punctuation, including ":"). istokenc(c: int): int { return c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '-' || c == '.' || c == '/' || c == '_' || c == ':' || c == '*' || c == '+' || c == '='; } istoken(s: string): int { if(s == nil) return 0; for(i := 0; i < len s; i++) case s[i] { '0' to '9' => if(i == 0) return 0; 'a' to 'z' or 'A' to 'Z' or '-' or '.' or '/' or '_' or ':' or '*' or '+' or '=' => break; * => return 0; } return 1; } # should the data qualify as binary or text? # the if(0) version accepts valid Unicode sequences # could use [display] to control character set? istextual(a: array of byte): int { for(i := 0; i < len a;){ if(0){ (c, n, ok) := sys->byte2char(a, i); if(!ok || c < ' ' && !isspace(c) || c >= 16r7F) return 0; i += n; }else{ c := int a[i++]; if(c < ' ' && !isspace(c) || c >= 16r7F) return 0; } } return 1; } esc(c: int): string { case c { '"' => return "\\\""; '\\' => return "\\\\"; '\b' => return "\\b"; '\f' => return "\\f"; '\n' => return "\\n"; '\t' => return "\\t"; '\r' => return "\\r"; '\v' => return "\\v"; * => if(c < ' ' || c >= 16r7F) return sys->sprint("\\x%.2ux", c & 16rFF); } return nil; } quote(s: string): string { if(istoken(s)) return s; for(i := 0; i < len s; i++) if((v := esc(s[i])) != nil){ os := "\"" + s[0:i] + v; while(++i < len s){ if((v = esc(s[i])) != nil) os += v; else os[len os] = s[i]; } os[len os] = '"'; return os; } return "\""+s+"\""; } # # other S expression operations # Sexp.islist(e: self ref Sexp): int { return e != nil && tagof e == tagof Sexp.List; } Sexp.els(e: self ref Sexp): list of ref Sexp { if(e == nil) return nil; pick s := e { List => return s.l; * => return nil; } } Sexp.op(e: self ref Sexp): string { if(e == nil) return nil; pick s := e { String => return s.s; Binary => return nil; List => if(s.l == nil) return nil; pick t := hd s.l { String => return t.s; * => return nil; } } return nil; } Sexp.args(e: self ref Sexp): list of ref Sexp { if((l := e.els()) != nil) return tl l; return nil; } Sexp.asdata(e: self ref Sexp): array of byte { if(e == nil) return nil; pick s := e { List => return nil; String => return array of byte s.s; Binary => return s.data; } } Sexp.astext(e: self ref Sexp): string { if(e == nil) return nil; pick s := e { List => return nil; String => return s.s; Binary => return string s.data; # questionable; should possibly treat it as latin-1 } } Sexp.eq(e1: self ref Sexp, e2: ref Sexp): int { if(e1 == e2) return 1; if(e1 == nil || e2 == nil || tagof e1 != tagof e2) return 0; pick s1 := e1 { List => pick s2 := e2 { List => l1 := s1.l; l2 := s2.l; for(; l1 != nil; l1 = tl l1){ if(l2 == nil || !(hd l1).eq(hd l2)) return 0; l2 = tl l2; } return l2 == nil; } String => pick s2 := e2 { String => return s1.s == s2.s && s1.hint == s2.hint; } Binary => pick s2 := e2 { Binary => if(len s1.data != len s2.data || s1.hint != s2.hint) return 0; for(i := 0; i < len s1.data; i++) if(s1.data[i] != s2.data[i]) return 0; return 1; } } return 0; } Sexp.copy(e: self ref Sexp): ref Sexp { if(e == nil) return nil; pick r := e { List => rl: list of ref Sexp; for(l := r.l; l != nil; l = tl l) rl = (hd l).copy() :: rl; for(l = nil; rl != nil; rl = tl rl) l = hd rl :: l; return ref Sexp.List(l); String => return ref *r; # safe because .s and .hint are strings, immutable Binary => b: array of byte; if((a := r.data) != nil){ b = array[len a] of byte; b[0:] = a; } return ref Sexp.Binary(b, r.hint); } }