ref: 2b23d05d57743af57385cd42c0fd2d223b11d8c8
dir: /scan.c/
#include <u.h> #include <libc.h> #include <thread.h> #include "dat.h" #include "fns.h" Token * newtok(TokenList *tokens, int tag) { Token *new; tokens->count++; tokens->tokens = allocextra(tokens, sizeof(Token) * tokens->count); new = tokens->tokens + (tokens->count-1); new->tag = tag; return new; } TokenList * scan(char *buf) { Rune r; int n, id; TokenList *tokens = alloc(DataTokenList); Token *tok; char *cp = buf; while(*cp){ n = chartorune(&r, cp); int new = -1; switch(r){ case L'(': new = TokLparen; break; case L')': new = TokRparen; break; case L'[': new = TokLbrack; break; case L']': new = TokRbrack; break; case L'{': new = TokLbrace; break; case L'}': new = TokRbrace; break; case L'\n': new = TokNewline; break; case L'⋄': new = TokDiamond; break; case L'∇': new = TokDel; break; case L'←': new = TokLarrow; break; case L';': new = TokSemi; break; } if(new != -1){ newtok(tokens, new); goto next; } if((id = primid(cp)) != -1){ n = strlen(primsymb(id)); tok = newtok(tokens, TokPrimitive); tok->prim = id; tok->nameclass = primclass(id); goto next; } if(isspacerune(r)) goto next; if(isdigitrune(r)){ char *rest; vlong num = strtoll(cp, &rest, 10); n = rest - cp; tok = newtok(tokens, TokNumber); tok->num = num; goto next; } if(isalpharune(r)){ char *start = cp; do{ cp += n; n = chartorune(&r, cp); }while(isalpharune(r) || isdigitrune(r)); tok = newtok(tokens, TokName); usize size = cp - start; tok->name = malloc(size + 1); memcpy(tok->name, start, size); tok->name[size] = 0; continue; } if(r == '\''){ cp += n; n = chartorune(&r, cp); char *start = cp; while(!(r == '\'' || r == 0)){ cp += n; n = chartorune(&r, cp); } if(r == 0) error(ESyntax, "unmatched '"); tok = newtok(tokens, TokString); usize size = utfnlen(start, cp - start) + 1; tok->string = malloc(sizeof(Rune) * size); runesnprint(tok->string, size, "%s", start); goto next; } error(ESyntax, "unexpected: '%C'", r); next: cp += n; } newtok(tokens, TokEnd); return tokens; } char * printtok(Token t) { char buf[1024]; char *p = buf; switch(t.tag){ case TokNumber: sprint(p, "number"); break; case TokName: sprint(p, "name"); break; case TokLparen: sprint(p, "("); break; case TokRparen: sprint(p, ")"); break; case TokLbrack: sprint(p, "["); break; case TokRbrack: sprint(p, "]"); break; case TokLbrace: sprint(p, "{"); break; case TokRbrace: sprint(p, "}"); break; case TokNewline: sprint(p, "newline"); break; case TokDiamond: sprint(p, "⋄"); break; case TokPrimitive: sprint(p, "primitive"); break; case TokDel: sprint(p, "∇"); break; case TokLarrow: sprint(p, "←"); break; case TokSemi: sprint(p, ";"); break; default: sprint(p, "???"); } return buf; }