shithub: mc

Download patch

ref: 1692dfdd9688c970e0b5fb1def9758b229bc1402
parent: b92e97f95a5e15f3eb1d0776346378ff2cfd49fd
author: Ori Bernstein <[email protected]>
date: Mon Jul 30 21:42:43 EDT 2012

Add files that I accidentally removed.

--- /dev/null
+++ b/6/Makefile
@@ -1,0 +1,12 @@
+INSTBIN=6m
+BIN=6m
+OBJ=isel.o \
+    locs.o \
+    main.o \
+    ra.o \
+    simp.o \
+
+DEPS=../parse/libparse.a ../opt/libopt.a
+
+include ../mk/lexyacc.mk
+include ../mk/c.mk
--- /dev/null
+++ b/6/asm.h
@@ -1,0 +1,201 @@
+#define Maxarg 4        /* maximum number of args an insn can have */
+#define Ptrsz 8         /* the size of a machine word (ie, pointer size) */
+#define K 14            /* the number of allocatable registers */
+
+typedef size_t regid;
+
+typedef struct Insn Insn;
+typedef struct Loc Loc;
+typedef struct Func Func;
+typedef struct Blob Blob;
+typedef struct Isel Isel;
+typedef struct Asmbb Asmbb;
+
+typedef enum {
+#define Insn(val, fmt, use, def) val,
+#include "insns.def"
+#undef Insn
+} AsmOp;
+
+typedef enum {
+#define Reg(r, name, mode) r,
+#include "regs.def"
+#undef Reg
+    Nreg
+} Reg;
+
+typedef enum {
+    Locnone,
+    Loclbl,  /* label */
+    Locreg,  /* register */
+    Locmem,  /* reg offset mem */
+    Locmeml, /* label offset mem */
+    Loclit,  /* literal value */
+    Loclitl  /* label address */
+} Loctype;
+
+typedef enum {
+    ModeNone,
+    ModeB, /* byte */
+    ModeS, /* short */
+    ModeL, /* long */
+    ModeQ, /* quad */
+    ModeF, /* float32 */
+    ModeD, /* float64 */
+    Nmode,
+} Mode;
+
+struct Loc {
+    Loctype type;
+    Mode mode;
+    union {
+        char *lbl;
+        struct {
+            regid id;
+            Reg   colour;
+        } reg;
+        long  lit;
+        /* disp(base + index) */
+        struct {
+            /* only one of lbldisp and constdisp may be used */
+            char *lbldisp;
+            long constdisp;
+            int scale; /* 0,1,2,4, or 8 */
+            Loc *base; /* needed */
+            Loc *idx;  /* optional */
+        } mem;
+    };
+};
+
+struct Insn {
+    AsmOp op;
+    Loc *args[Maxarg];
+    size_t nargs;
+};
+
+struct Func {
+    char *name;
+    int   isexport;
+    size_t stksz;
+    Type *type;
+    Htab *locs;
+    Node *ret;
+    Cfg  *cfg;
+};
+
+struct Asmbb {
+    int id;
+    char **lbls;
+    size_t nlbls;
+    Insn **il;
+    size_t ni;
+
+    Bitset *pred;
+    Bitset *succ;
+    Bitset *use;
+    Bitset *def;
+    Bitset *livein;
+    Bitset *liveout;
+};
+
+
+/* instruction selection state */
+struct Isel {
+    Cfg  *cfg;          /* cfg built with nodes */
+
+    Asmbb **bb;         /* 1:1 mappings with the Node bbs in the CFG */
+    size_t nbb;
+    Asmbb *curbb;
+
+    Node *ret;          /* we store the return into here */
+    Htab *locs;         /* decl id => int stkoff */
+    Htab *globls;       /* decl id => char *globlname */
+
+    /* increased when we spill */
+    Loc *stksz;
+
+    /* register allocator state */
+    Bitset *prepainted; /* locations that need to be a specific colour */
+
+    size_t *gbits;      /* igraph matrix repr */
+    Bitset **gadj;      /* igraph adj set repr */
+    int *degree;        /* degree of nodes */
+    Loc **aliasmap;   /* mapping of aliases */
+
+    Loc  **selstk;
+    size_t nselstk;
+
+    Bitset *coalesced;
+    Bitset *spilled;
+
+    Insn ***rmoves;
+    size_t *nrmoves;
+
+    /* move sets */
+    Insn **mcoalesced;
+    size_t nmcoalesced;
+
+    Insn **mconstrained;
+    size_t nmconstrained;
+
+    Insn **mfrozen;
+    size_t nmfrozen;
+
+    Insn **mactive;
+    size_t nmactive;
+
+
+    /* worklists */
+    Insn **wlmove;
+    size_t nwlmove;
+
+    Loc **wlspill;
+    size_t nwlspill;
+
+    Loc **wlfreeze;
+    size_t nwlfreeze;
+
+    Loc **wlsimp;
+    size_t nwlsimp;
+};
+
+/* entry points */
+void genblob(FILE *fd, Node *blob, Htab *globls);
+void genasm(FILE *fd, Func *fn, Htab *globls);
+void gen(Node *file, char *out);
+
+/* location generation */
+extern size_t maxregid;
+extern Loc **locmap; /* mapping from reg id => Loc * */
+
+char *genlblstr(char *buf, size_t sz);
+Node *genlbl(void);
+Loc *loclbl(Node *lbl);
+Loc *locstrlbl(char *lbl);
+Loc *locreg(Mode m);
+Loc *locphysreg(Reg r);
+Loc *locmem(long disp, Loc *base, Loc *idx, Mode mode);
+Loc *locmeml(char *disp, Loc *base, Loc *idx, Mode mode);
+Loc *locmems(long disp, Loc *base, Loc *idx, int scale, Mode mode);
+Loc *locmemls(char *disp, Loc *base, Loc *idx, int scale, Mode mode);
+Loc *loclit(long val, Mode m);
+Loc *loclitl(char *lbl);
+Loc *coreg(Reg r, Mode m);
+
+void locprint(FILE *fd, Loc *l, char spec);
+void iprintf(FILE *fd, Insn *insn);
+
+/* register allocation */
+extern char *regnames[]; /* name table */
+extern Mode regmodes[];  /* mode table */
+
+void regalloc(Isel *s);
+
+
+/* useful functions */
+size_t tysize(Type *t);
+size_t size(Node *n);
+int stacktype(Type *t);
+int stacknode(Node *n);
+void breakhere();
+void dumpasm(Isel *s, FILE *fd);
--- /dev/null
+++ b/6/insns.def
@@ -1,0 +1,69 @@
+/* Table of instructions. Each instruction
+   is defined by the following macro:
+        Insn(enumval, fmt, attr)
+    The format string 'fmt' has the following expansions:
+        %r            - reg
+        %m            - mem
+        %i            - imm
+        %v            - reg/mem
+        %u            - reg/imm
+        %x            - reg/mem/imm
+        %[1-9]*t      - Mode of an operand. The optional number
+                        preceeding it is the operand desired for
+                        the mode.
+    Currently, there aren't any attrs, because none were needed yet.
+    Eventually, they'll probably include flag setting and so on.
+
+    For technical reasons, the indexing on use and def statments is 1-based,
+    instead of 0-based. (0 is the sentinel value).
+*/
+
+Insn(Inone,     "BAD_INSN",                     Use(), Def())
+/* Note, the mov instruction is specified in an overly general manner. */
+Insn(Imov,      "\tmov%t %x,%x\n",              Use(.l={1}),                    Def(.l={2}))
+Insn(Imovz,     "\tmovz%1t%2t %x,%x\n",         Use(.l={1}),                    Def(.l={2}))
+Insn(Imovs,     "\tmovs%1t%2t %x,%x\n",         Use(.l={1}),                    Def(.l={2}))
+Insn(Ilea,      "\tlea%2t %m,%r\n",             Use(.l={1}),                    Def(.l={2}))
+
+Insn(Iadd,      "\tadd%t %x,%r\n",              Use(.l={1,2}),                  Def(.l={2}))
+Insn(Isub,      "\tsub%t %x,%r\n",              Use(.l={1,2}),                  Def(.l={2}))
+Insn(Iimul,     "\timul%t %x,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
+Insn(Imul,      "\tmul%t %r\n",                 Use(.l={1},.r={Reax}),          Def(.r={Reax,Redx}))
+Insn(Idiv,      "\tdiv%t %r\n",                 Use(.l={1},.r={Reax,Redx}),     Def(.r={Reax,Redx}))
+Insn(Ineg,      "\tneg%t %r\n",                 Use(.l={1}),                    Def(.l={1}))
+Insn(Iand,      "\tand%t %x,%r\n",              Use(.l={1,2}),                  Def(.l={2}))
+Insn(Ior,       "\tor%t  %x,%r\n",              Use(.l={1,2}),                  Def(.l={2}))
+Insn(Ixor,      "\txor%t %x,%r\n",              Use(.l={1,2}),                  Def(.l={2}))
+Insn(Inot,      "\tnot%t %v\n",                 Use(.l={1}),                    Def(.l={1}))
+Insn(Ishl,      "\tsal%2t %u,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
+Insn(Isar,      "\tshr%2t %u,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
+Insn(Ishr,      "\tshr%2t %u,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
+
+Insn(Itest,     "\ttest%t %x,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
+Insn(Icmp,      "\tcmp%t  %x,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
+
+Insn(Ipush,     "\tpush%t %r\n",                Use(.l={1}),                    Def())
+Insn(Ipop,      "\tpop%t %r\n",                 Use(.l={1}),                    Def())
+
+/* branch instructions */
+Insn(Isetz,     "\tsetz  %v\n",                 Use(),  Def(.l={1}))
+Insn(Isetnz,    "\tsetnz %v\n",                 Use(),  Def(.l={1}))
+Insn(Isetl,     "\tsetl  %v\n",                 Use(),  Def(.l={1}))
+Insn(Isetle,    "\tsetle %v\n",                 Use(),  Def(.l={1}))
+Insn(Isetg,     "\tsetg %v\n",                  Use(),  Def(.l={1}))
+Insn(Isetge,    "\tsetge %v\n",                 Use(),  Def(.l={1}))
+
+/* branch instructions */
+Insn(Icall,     "\tcall %v\n",                  Use(.l={1}), Def())
+Insn(Icallind,  "\tcall *%v\n",                 Use(.l={1}), Def())
+Insn(Ijmp,      "\tjmp %v\n",                   Use(.l={1}), Def())
+Insn(Ijz,       "\tjz %v\n",                    Use(.l={1}), Def())
+Insn(Ijnz,      "\tjnz %v\n",                   Use(.l={1}), Def())
+Insn(Ijl,       "\tjl %v\n",                    Use(.l={1}), Def())
+Insn(Ijle,      "\tjle %v\n",                   Use(.l={1}), Def())
+Insn(Ijg,       "\tjg %v\n",                    Use(.l={1}), Def())
+Insn(Ijge,      "\tjge %v\n",                   Use(.l={1}), Def())
+Insn(Iret,      "\tret\n",                      Use(), Def())
+
+/* not really an insn... */
+Insn(Ilbl,      "%v:\n",                        Use(), Def())
--- /dev/null
+++ b/6/isel.c
@@ -1,0 +1,900 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "parse.h"
+#include "opt.h"
+#include "asm.h"
+
+#include "platform.h"
+
+/* string tables */
+char *insnfmts[] = {
+#define Insn(val, fmt, use, def) fmt,
+#include "insns.def"
+#undef Insn
+};
+
+char modenames[] = {
+  [ModeB] = 'b',
+  [ModeS] = 's',
+  [ModeL] = 'l',
+  [ModeQ] = 'q',
+  [ModeF] = 'f',
+  [ModeD] = 'd'
+};
+
+/* forward decls */
+Loc *selexpr(Isel *s, Node *n);
+
+/* used to decide which operator is appropriate
+ * for implementing various conditional operators */
+struct {
+    AsmOp test;
+    AsmOp jmp;
+    AsmOp getflag;
+} reloptab[Numops] = {
+    [Olnot] = {Itest, Ijz, Isetz},
+    [Oeq] = {Icmp, Ijz, Isetz},
+    [One] = {Icmp, Ijnz, Isetnz},
+    [Ogt] = {Icmp, Ijg, Isetg},
+    [Oge] = {Icmp, Ijge, Isetge},
+    [Olt] = {Icmp, Ijl, Isetl},
+    [Ole] = {Icmp, Ijle, Isetle}
+};
+
+static Mode mode(Node *n)
+{
+    Type *t;
+
+    t = tybase(exprtype(n));
+    switch (t->type) {
+        case Tyfloat32: return ModeF; break;
+        case Tyfloat64: return ModeD; break;
+        default:
+            if (stacktype(t))
+                return ModeNone;
+            switch (size(n)) {
+                case 1: return ModeB; break;
+                case 2: return ModeS; break;
+                case 4: return ModeL; break;
+                case 8: return ModeQ; break;
+            }
+            break;
+    }
+    /* FIXME: huh. what should the mode for, say, structs
+     * be when we have no intention of loading /through/ the
+     * pointer? */
+    return ModeNone;
+}
+
+static Loc *loc(Isel *s, Node *n)
+{
+    size_t stkoff;
+    Loc *l, *rip;
+    Node *v;
+
+    switch (exprop(n)) {
+        case Ovar:
+            if (hthas(s->locs, n)) {
+                stkoff = (size_t)htget(s->locs, n);
+                l = locmem(-stkoff, locphysreg(Rrbp), NULL, mode(n));
+            } else if (hthas(s->globls, n)) {
+                if (tybase(exprtype(n))->type == Tyfunc)
+                    rip = NULL;
+                else
+                    rip = locphysreg(Rrip);
+                l = locmeml(htget(s->globls, n), rip, NULL, mode(n));
+            } else {
+                die("%s (id=%ld) not found", namestr(n->expr.args[0]), n->expr.did);
+            }
+            break;
+        case Olit:
+            v = n->expr.args[0];
+            switch (v->lit.littype) {
+                case Lchr:      l = loclit(v->lit.chrval, mode(n)); break;
+                case Lbool:     l = loclit(v->lit.boolval, mode(n)); break;
+                case Lint:      l = loclit(v->lit.intval, mode(n)); break;
+                default:
+                                die("Literal type %s should be blob", litstr(v->lit.littype));
+            }
+            break;
+        default:
+            die("Node %s not leaf in loc()", opstr(exprop(n)));
+            break;
+    }
+    return l;
+}
+
+static Insn *mkinsnv(AsmOp op, va_list ap)
+{
+    Loc *l;
+    Insn *i;
+    int n;
+
+    n = 0;
+    i = malloc(sizeof(Insn));
+    i->op = op;
+    while ((l = va_arg(ap, Loc*)) != NULL)
+        i->args[n++] = l;
+    i->nargs = n;
+    return i;
+}
+
+static void g(Isel *s, AsmOp op, ...)
+{
+    va_list ap;
+    Insn *i;
+
+    va_start(ap, op);
+    i = mkinsnv(op, ap);
+    va_end(ap);
+    if (debugopt['i']) {
+        printf("GEN ");
+        iprintf(stdout, i);
+    }
+    lappend(&s->curbb->il, &s->curbb->ni, i);
+}
+
+static void movz(Isel *s, Loc *src, Loc *dst)
+{
+    if (src->mode == dst->mode)
+        g(s, Imov, src, dst, NULL);
+    else
+        g(s, Imovz, src, dst, NULL);
+}
+
+
+static void load(Isel *s, Loc *a, Loc *b)
+{
+    Loc *l;
+
+    assert(b->type == Locreg);
+    if (a->type == Locreg)
+        l = locmem(0, b, Rnone, a->mode);
+    else
+        l = a;
+    g(s, Imov, l, b, NULL);
+}
+
+static void stor(Isel *s, Loc *a, Loc *b)
+{
+    Loc *l;
+
+    assert(a->type == Locreg || a->type == Loclit);
+    if (b->type == Locreg)
+        l = locmem(0, b, Rnone, b->mode);
+    else
+        l = b;
+    g(s, Imov, a, l, NULL);
+}
+
+/* ensures that a location is within a reg */
+static Loc *inr(Isel *s, Loc *a)
+{
+    Loc *r;
+
+    if (a->type == Locreg)
+        return a;
+    r = locreg(a->mode);
+    load(s, a, r);
+    return r;
+}
+
+/* ensures that a location is within a reg or an imm */
+static Loc *inri(Isel *s, Loc *a)
+{
+    if (a->type == Locreg || a->type == Loclit)
+        return a;
+    else
+        return inr(s, a);
+}
+
+/* ensures that a location is within a reg or an imm */
+static Loc *inrm(Isel *s, Loc *a)
+{
+    if (a->type == Locreg || a->type == Locmem)
+        return a;
+    else
+        return inr(s, a);
+}
+
+/* If we're testing equality, etc, it's a bit silly
+ * to generate the test, store it to a bite, expand it
+ * to the right width, and then test it again. Try to optimize
+ * for these common cases.
+ *
+ * if we're doing the optimization to avoid
+ * multiple tests, we want to eval the children
+ * of the first arg, instead of the first arg
+ * directly */
+static void selcjmp(Isel *s, Node *n, Node **args)
+{
+    Loc *a, *b;
+    Loc *l1, *l2;
+    AsmOp cond, jmp;
+
+    cond = reloptab[exprop(args[0])].test;
+    jmp = reloptab[exprop(args[0])].jmp;
+    /* if we have a cond, we're knocking off the redundant test,
+     * and want to eval the children */
+    if (cond) {
+        a = selexpr(s, args[0]->expr.args[0]);
+        if (args[0]->expr.nargs == 2)
+            b = selexpr(s, args[0]->expr.args[1]);
+        else
+            b = a;
+        a = inr(s, a);
+    } else {
+        cond = Itest;
+        jmp = Ijnz;
+        b = inr(s, selexpr(s, args[0])); /* cond */
+        a = b;
+    }
+
+    /* the jump targets will always be evaluated the same way */
+    l1 = loclbl(args[1]); /* if true */
+    l2 = loclbl(args[2]); /* if false */
+
+    g(s, cond, b, a, NULL);
+    g(s, jmp, l1, NULL);
+    g(s, Ijmp, l2, NULL);
+}
+
+static Loc *binop(Isel *s, AsmOp op, Node *x, Node *y)
+{
+    Loc *a, *b;
+
+    a = selexpr(s, x);
+    b = selexpr(s, y);
+    a = inr(s, a);
+    g(s, op, b, a, NULL);
+    return a;
+}
+
+/* We have a few common cases to optimize here:
+ *    Oaddr(expr)
+ * or:
+ *    Oadd(
+ *        reg,
+ *        reg||const)
+ *
+ * or:
+ *    Oadd(
+ *        reg,
+ *        Omul(reg,
+ *             2 || 4 || 8)))
+ */
+static int ismergablemul(Node *n, int *r)
+{
+    int v;
+
+    if (exprop(n) != Omul)
+        return 0;
+    if (exprop(n->expr.args[1]) != Olit)
+        return 0;
+    if (n->expr.args[1]->expr.args[0]->type != Nlit)
+        return 0;
+    if (n->expr.args[1]->expr.args[0]->lit.littype != Lint)
+        return 0;
+    v = n->expr.args[1]->expr.args[0]->lit.intval;
+    if (v != 2 && v != 4 && v != 8)
+        return 0;
+    *r = v;
+    return 1;
+}
+
+static Loc *memloc(Isel *s, Node *e, Mode m)
+{
+    Node **args;
+    Loc *l, *b, *o; /* location, base, offset */
+    int scale;
+
+    scale = 1;
+    l = NULL;
+    args = e->expr.args;
+    if (exprop(e) == Oadd) {
+        b = selexpr(s, args[0]);
+        if (ismergablemul(args[1], &scale))
+            o = selexpr(s, args[1]->expr.args[0]);
+        else
+            o = selexpr(s, args[1]);
+
+        if (b->type != Locreg)
+            b = inr(s, b);
+        if (o->type == Loclit) {
+            l = locmem(scale*o->lit, b, Rnone, m);
+        } else {
+            b = inr(s, b);
+            o = inr(s, o);
+            l = locmems(0, b, o, scale, m);
+        }
+    } else {
+        l = selexpr(s, e);
+        l = inr(s, l);
+        l = locmem(0, l, Rnone, m);
+    }
+    assert(l != NULL);
+    return l;
+}
+
+static void blit(Isel *s, Loc *to, Loc *from, size_t dstoff, size_t srcoff, size_t sz)
+{
+    size_t i;
+    Loc *sp, *dp; /* pointers to src, dst */
+    Loc *tmp, *src, *dst; /* source memory, dst memory */
+
+    sp = inr(s, from);
+    dp = inr(s, to);
+
+    /* Slightly funny loop condition: We might have trailing bytes
+     * that we can't blit word-wise. */
+    tmp = locreg(ModeL);
+    for (i = 0; i < sz/4; i++) {
+        src = locmem(i*4 + srcoff, sp, NULL, ModeL);
+        dst = locmem(i*4 + dstoff, dp, NULL, ModeL);
+        g(s, Imov, src, tmp, NULL);
+        g(s, Imov, tmp, dst, NULL);
+    }
+    /* now, the trailing bytes */
+    tmp = locreg(ModeB);
+    for (; i < sz%4; i++) {
+        src = locmem(i, sp, NULL, ModeB);
+        dst = locmem(i, dp, NULL, ModeB);
+        g(s, Imov, src, tmp, NULL);
+        g(s, Imov, tmp, dst, NULL);
+    }
+}
+
+static Loc *gencall(Isel *s, Node *n)
+{
+    Loc *src, *dst, *arg, *fn;   /* values we reduced */
+    Loc *rax, *rsp;       /* hard-coded registers */
+    Loc *stkbump;        /* calculated stack offset */
+    int argsz, argoff;
+    size_t i;
+
+    rsp = locphysreg(Rrsp);
+    if (tybase(exprtype(n))->type == Tyvoid)
+        rax = NULL;
+    else if (stacktype(exprtype(n)))
+        rax = locphysreg(Rrax);
+    else
+        rax = coreg(Rrax, mode(n));
+    argsz = 0;
+    /* Have to calculate the amount to bump the stack
+     * pointer by in one pass first, otherwise if we push
+     * one at a time, we evaluate the args in reverse order.
+     * Not good.
+     *
+     * We skip the first operand, since it's the function itself */
+    for (i = 1; i < n->expr.nargs; i++)
+        argsz += size(n->expr.args[i]);
+    stkbump = loclit(argsz, ModeQ);
+    if (argsz)
+        g(s, Isub, stkbump, rsp, NULL);
+
+    /* Now, we can evaluate the arguments */
+    argoff = 0;
+    for (i = 1; i < n->expr.nargs; i++) {
+        arg = selexpr(s, n->expr.args[i]);
+        if (stacknode(n->expr.args[i])) {
+            dst = locreg(ModeQ);
+            src = locreg(ModeQ);
+            g(s, Ilea, arg, src, NULL);
+            blit(s, rsp, src, argoff, 0, size(n->expr.args[i]));
+        } else {
+            dst = locmem(argoff, rsp, NULL, arg->mode);
+            arg = inri(s, arg);
+            stor(s, arg, dst);
+        }
+        argoff += size(n->expr.args[i]);
+    }
+    fn = selexpr(s, n->expr.args[0]);
+    if (fn->type == Loclbl || fn->type == Locmeml)
+        g(s, Icall, fn, NULL);
+    else
+        g(s, Icallind, fn, NULL);
+    if (argsz)
+        g(s, Iadd, stkbump, rsp, NULL);
+    return rax;
+}
+
+Loc *selexpr(Isel *s, Node *n)
+{
+    Loc *a, *b, *c, *d, *r;
+    Loc *eax, *edx, *cl; /* x86 wants some hard-coded regs */
+    Node **args;
+
+    args = n->expr.args;
+    eax = locphysreg(Reax);
+    edx = locphysreg(Redx);
+    cl = locphysreg(Rcl);
+    r = NULL;
+    switch (exprop(n)) {
+        case Oadd:      r = binop(s, Iadd, args[0], args[1]); break;
+        case Osub:      r = binop(s, Isub, args[0], args[1]); break;
+        case Obor:      r = binop(s, Ior,  args[0], args[1]); break;
+        case Oband:     r = binop(s, Iand, args[0], args[1]); break;
+        case Obxor:     r = binop(s, Ixor, args[0], args[1]); break;
+        case Omul:      r = binop(s, Iimul, args[0], args[1]); break;
+        case Odiv:
+        case Omod:
+            /* these get clobbered by the div insn */
+            a = selexpr(s, args[0]);
+            b = selexpr(s, args[1]);
+            b = inr(s, b);
+            c = coreg(Reax, mode(n));
+            r = locreg(a->mode);
+            if (r->mode == ModeB)
+                g(s, Ixor, eax, eax, NULL);
+            g(s, Imov, a, c, NULL);
+            g(s, Ixor, edx, edx, NULL);
+            g(s, Idiv, b, NULL);
+            if (exprop(n) == Odiv)
+                d = coreg(Reax, mode(n));
+            else if (r->mode != ModeB)
+                d = coreg(Redx, mode(n));
+            else
+                d = locphysreg(Rah);
+            g(s, Imov, d, r, NULL);
+            break;
+        case Oneg:
+            r = selexpr(s, args[0]);
+            r = inr(s, r);
+            g(s, Ineg, r, NULL);
+            break;
+
+        case Obsl:
+        case Obsr:
+            a = inr(s, selexpr(s, args[0]));
+            b = selexpr(s, args[1]);
+            if (b->type == Loclit) {
+                d = b;
+            } else {
+                c = coreg(Rcl, b->mode);
+                g(s, Imov, b, c, NULL);
+                d = cl;
+            }
+            if (exprop(n) == Obsr) {
+                if (istysigned(n->expr.type))
+                    g(s, Isar, d, a, NULL);
+                else
+                    g(s, Ishr, d, a, NULL);
+            } else {
+                g(s, Ishl, d, a, NULL);
+            }
+            r = a;
+            break;
+        case Obnot:
+            r = selexpr(s, args[0]);
+            r = inrm(s, r);
+            g(s, Inot, r, NULL);
+            break;
+
+        case Oderef:
+            a = selexpr(s, args[0]);
+            a = inr(s, a);
+            r = locreg(mode(n));
+            c = locmem(0, a, Rnone, mode(n));
+            g(s, Imov, c, r, NULL);
+            break;
+
+        case Oaddr:
+            a = selexpr(s, args[0]);
+            if (a->type == Loclbl || (a->type == Locmeml && !a->mem.base)) {
+                r = loclitl(a->lbl);
+            } else {
+                r = locreg(ModeQ);
+                g(s, Ilea, a, r, NULL);
+            }
+            break;
+
+        case Olnot:
+            a = selexpr(s, args[0]);
+            b = locreg(ModeB);
+            r = locreg(mode(n));
+            g(s, reloptab[exprop(n)].test, a, a, NULL);
+            g(s, reloptab[exprop(n)].getflag, b, NULL);
+            movz(s, b, r);
+            break;
+
+        case Oeq: case One: case Ogt: case Oge: case Olt: case Ole:
+            a = selexpr(s, args[0]);
+            b = selexpr(s, args[1]);
+            a = inr(s, a);
+            c = locreg(ModeB);
+            r = locreg(mode(n));
+            g(s, reloptab[exprop(n)].test, b, a, NULL);
+            g(s, reloptab[exprop(n)].getflag, c, NULL);
+            movz(s, c, r);
+            return r;
+
+        case Oasn:  /* relabel */
+            die("Unimplemented op %s", opstr(exprop(n)));
+            break;
+        case Oset:
+            assert(exprop(args[0]) == Ovar);
+            b = selexpr(s, args[1]);
+            a = selexpr(s, args[0]);
+            b = inri(s, b);
+            g(s, Imov, b, a, NULL);
+            r = b;
+            break;
+        case Ostor: /* reg -> mem */
+            b = selexpr(s, args[1]);
+            a = memloc(s, args[0], mode(args[0]));
+            b = inri(s, b);
+            g(s, Imov, b, a, NULL);
+            r = b;
+            break;
+        case Oload: /* mem -> reg */
+            a = memloc(s, args[0], mode(n));
+            r = locreg(mode(n));
+            /* FIXME: we should be moving the correct 'coreg' */
+            g(s, Imov, a, r, NULL);
+            break;
+        case Ocall:
+            r = gencall(s, n);
+            break;
+        case Ojmp:
+            g(s, Ijmp, a = loclbl(args[0]), NULL);
+            break;
+        case Ocjmp:
+            selcjmp(s, n, args);
+            break;
+
+        case Olit: /* fall through */
+            r = loc(s, n);
+            break;
+        case Ovar:
+            r = loc(s, n);
+            break;
+        case Olbl:
+            r = loclbl(args[0]);
+            break;
+        case Oblit:
+            a = selexpr(s, args[0]);
+            b = selexpr(s, args[1]);
+            blit(s, a, b, 0, 0, args[2]->expr.args[0]->lit.intval);
+            r = b;
+            break;
+        case Otrunc:
+            r = selexpr(s, args[0]);
+            r->mode = mode(n);
+            break;
+        case Ozwiden:
+            a = selexpr(s, args[0]);
+            b = locreg(mode(n));
+            movz(s, a, b);
+            r = b;
+            break;
+        case Oswiden:
+            a = selexpr(s, args[0]);
+            b = locreg(mode(n));
+            g(s, Imovs, a, b, NULL);
+            r = b;
+            break;
+
+        /* These operators should never show up in the reduced trees,
+         * since they should have been replaced with more primitive
+         * expressions by now */
+        case Obad: case Oret: case Opreinc: case Opostinc: case Opredec:
+        case Opostdec: case Olor: case Oland: case Oaddeq:
+        case Osubeq: case Omuleq: case Odiveq: case Omodeq: case Oboreq:
+        case Obandeq: case Obxoreq: case Obsleq: case Obsreq: case Omemb:
+        case Oslice: case Oidx: case Osize: case Numops:
+        case Ocons: case Otup: case Oarr:
+        case Oslbase: case Osllen: case Ocast:
+            dump(n, stdout);
+            die("Should not see %s in isel", opstr(exprop(n)));
+            break;
+    }
+    return r;
+}
+
+void locprint(FILE *fd, Loc *l, char spec)
+{
+    switch (l->type) {
+        case Loclitl:
+            assert(spec == 'i' || spec == 'x' || spec == 'u');
+            fprintf(fd, "$%s", l->lbl);
+            break;
+        case Loclbl:
+            assert(spec == 'm' || spec == 'v' || spec == 'x');
+            fprintf(fd, "%s", l->lbl);
+            break;
+        case Locreg:
+            assert(spec == 'r' || spec == 'v' || spec == 'x' || spec == 'u');
+            if (l->reg.colour == Rnone)
+                fprintf(fd, "%%P.%zd", l->reg.id);
+            else
+                fprintf(fd, "%s", regnames[l->reg.colour]);
+            break;
+        case Locmem:
+        case Locmeml:
+            assert(spec == 'm' || spec == 'v' || spec == 'x');
+            if (l->type == Locmem) {
+                if (l->mem.constdisp)
+                    fprintf(fd, "%ld", l->mem.constdisp);
+            } else {
+                if (l->mem.lbldisp)
+                    fprintf(fd, "%s", l->mem.lbldisp);
+            }
+            if (l->mem.base) {
+                fprintf(fd, "(");
+                locprint(fd, l->mem.base, 'r');
+                if (l->mem.idx) {
+                    fprintf(fd, ",");
+                    locprint(fd, l->mem.idx, 'r');
+                }
+                if (l->mem.scale > 1)
+                    fprintf(fd, ",%d", l->mem.scale);
+                if (l->mem.base)
+                    fprintf(fd, ")");
+            } else if (l->type != Locmeml) {
+                die("Only locmeml can have unspecified base reg");
+            }
+            break;
+        case Loclit:
+            assert(spec == 'i' || spec == 'x' || spec == 'u');
+            fprintf(fd, "$%ld", l->lit);
+            break;
+        case Locnone:
+            die("Bad location in locprint()");
+            break;
+    }
+}
+
+void iprintf(FILE *fd, Insn *insn)
+{
+    char *p;
+    int i;
+    int modeidx;
+
+    /* x64 has a quirk; it has no movzlq because mov zero extends. This
+     * means that we need to do a movl when we really want a movzlq. Since
+     * we don't know the name of the reg to use, we need to sub it in when
+     * writing... */
+    if (insn->op == Imovz) {
+        if (insn->args[0]->mode == ModeL && insn->args[1]->mode == ModeQ) {
+            if (insn->args[1]->reg.colour) {
+                insn->op = Imov;
+                insn->args[1] = coreg(insn->args[1]->reg.colour, ModeL);
+            }
+        }
+    }
+    p = insnfmts[insn->op];
+    i = 0;
+    modeidx = 0;
+    for (; *p; p++) {
+        if (*p !=  '%') {
+            fputc(*p, fd);
+            continue;
+        }
+
+        /* %-formating */
+        p++;
+        switch (*p) {
+            case '\0':
+                goto done; /* skip the final p++ */
+            case 'r': /* register */
+            case 'm': /* memory */
+            case 'i': /* imm */
+            case 'v': /* reg/mem */
+            case 'u': /* reg/imm */
+            case 'x': /* reg/mem/imm */
+                locprint(fd, insn->args[i], *p);
+                i++;
+                break;
+            case 't':
+            default:
+                /* the  asm description uses 1-based indexing, so that 0
+                 * can be used as a sentinel. */
+                if (isdigit(*p))
+                    modeidx = strtol(p, &p, 10) - 1;
+
+                if (*p == 't')
+                    fputc(modenames[insn->args[modeidx]->mode], fd);
+                else
+                    die("Invalid %%-specifier '%c'", *p);
+                break;
+        }
+    }
+done:
+    return;
+}
+
+static void isel(Isel *s, Node *n)
+{
+    Loc *lbl;
+
+    switch (n->type) {
+        case Nlbl:
+            g(s, Ilbl, lbl = loclbl(n), NULL);
+            break;
+        case Nexpr:
+            selexpr(s, n);
+            break;
+        case Ndecl:
+            break;
+        default:
+            die("Bad node type in isel()");
+            break;
+    }
+}
+
+static void prologue(Isel *s, size_t sz)
+{
+    Loc *rsp;
+    Loc *rbp;
+    Loc *stksz;
+
+    rsp = locphysreg(Rrsp);
+    rbp = locphysreg(Rrbp);
+    stksz = loclit(sz, ModeQ);
+    g(s, Ipush, rbp, NULL);
+    g(s, Imov, rsp, rbp, NULL);
+    g(s, Isub, stksz, rsp, NULL);
+    s->stksz = stksz; /* need to update if we spill */
+}
+
+static void epilogue(Isel *s)
+{
+    Loc *rsp, *rbp;
+    Loc *ret;
+
+    rsp = locphysreg(Rrsp);
+    rbp = locphysreg(Rrbp);
+    if (s->ret) {
+        ret = loc(s, s->ret);
+        g(s, Imov, ret, coreg(Rax, ret->mode), NULL);
+    }
+    g(s, Imov, rbp, rsp, NULL);
+    g(s, Ipop, rbp, NULL);
+    g(s, Iret, NULL);
+}
+
+static void writeasm(FILE *fd, Isel *s, Func *fn)
+{
+    size_t i, j;
+
+    if (fn->isexport || !strcmp(fn->name, Fprefix "main"))
+        fprintf(fd, ".globl %s\n", fn->name);
+    fprintf(fd, "%s:\n", fn->name);
+    for (j = 0; j < s->cfg->nbb; j++) {
+        for (i = 0; i < s->bb[j]->nlbls; i++)
+            fprintf(fd, "%s:\n", s->bb[j]->lbls[i]);
+        for (i = 0; i < s->bb[j]->ni; i++)
+            iprintf(fd, s->bb[j]->il[i]);
+    }
+}
+
+static Asmbb *mkasmbb(Bb *bb)
+{
+    Asmbb *as;
+
+    as = zalloc(sizeof(Asmbb));
+    as->id = bb->id;
+    as->pred = bsdup(bb->pred);
+    as->succ = bsdup(bb->succ);
+    as->lbls = memdup(bb->lbls, bb->nlbls*sizeof(char*));
+    as->nlbls = bb->nlbls;
+    return as;
+}
+
+static void writeblob(FILE *fd, char *p, size_t sz)
+{
+    size_t i;
+
+    for (i = 0; i < sz; i++) {
+        if (i % 60 == 0)
+            fprintf(fd, "\t.ascii \"");
+        if (isprint(p[i]))
+            fprintf(fd, "%c", p[i]);
+        else
+            fprintf(fd, "\\x%x", p[i] & 0xff);
+        /* line wrapping for readability */
+        if (i % 60 == 59 || i == sz - 1)
+            fprintf(fd, "\"\n");
+    }
+}
+
+static void writelit(FILE *fd, Node *v, size_t sz)
+{
+    char lbl[128];
+    size_t i;
+    char *intsz[] = {
+        [1] = ".byte",
+        [2] = ".short",
+        [4] = ".long",
+        [8] = ".quad"
+    };
+
+    assert(v->type == Nlit);
+    switch (v->lit.littype) {
+        case Lbool:     fprintf(fd, "\t.byte %d\n", v->lit.boolval);     break;
+        case Lchr:      fprintf(fd, "\t.long %d\n",  v->lit.chrval);     break;
+        case Lint:      fprintf(fd, "\t%s %lld\n", intsz[sz], v->lit.intval);    break;
+        case Lflt:      fprintf(fd, "\t.double %f\n", v->lit.fltval);    break;
+        case Lstr:      fprintf(fd, "\t.quad %s\n", genlblstr(lbl, 128));
+                        fprintf(fd, "\t.quad %zd\n", strlen(v->lit.strval));
+                        fprintf(fd, "%s:\n", lbl);
+                        writeblob(fd, v->lit.strval, strlen(v->lit.strval));
+                        break;
+        case Lseq:
+            for (i = 0; i < v->lit.nelt; i++)
+                writelit(fd, v->lit.seqval[i]->expr.args[0], size(v->lit.seqval[i]));
+            break;
+        case Lfunc:
+                        die("Generating this shit ain't ready yet ");
+    }
+}
+
+void genblob(FILE *fd, Node *blob, Htab *globls)
+{
+    size_t i;
+    char *lbl;
+
+    /* lits and such also get wrapped in decls */
+    assert(blob->type == Ndecl);
+
+    lbl = htget(globls, blob);
+    if (blob->decl.isexport)
+        fprintf(fd, ".globl %s\n", lbl);
+    fprintf(fd, "%s:\n", lbl);
+    if (blob->decl.init) {
+        if (exprop(blob->decl.init) != Olit)
+            die("Nonliteral initializer for global");
+        writelit(fd, blob->decl.init->expr.args[0], size(blob));
+    } else {
+        for (i = 0; i < size(blob); i++)
+            fprintf(fd, "\t.byte 0\n");
+    }
+}
+
+/* genasm requires all nodes in 'nl' to map cleanly to operations that are
+ * natively supported, as promised in the output of reduce().  No 64-bit
+ * operations on x32, no structures, and so on. */
+void genasm(FILE *fd, Func *fn, Htab *globls)
+{
+    Isel is = {0,};
+    size_t i, j;
+    char buf[128];
+
+    is.locs = fn->locs;
+    is.globls = globls;
+    is.ret = fn->ret;
+    is.cfg = fn->cfg;
+
+    for (i = 0; i < fn->cfg->nbb; i++)
+        lappend(&is.bb, &is.nbb, mkasmbb(fn->cfg->bb[i]));
+
+    is.curbb = is.bb[0];
+    prologue(&is, fn->stksz);
+    for (j = 0; j < fn->cfg->nbb - 1; j++) {
+        is.curbb = is.bb[j];
+        for (i = 0; i < fn->cfg->bb[j]->nnl; i++) {
+            /* put in a comment that says where this line comes from */
+            snprintf(buf, sizeof buf, "\n\t# bb = %zd, bbidx = %zd, line=%d",
+                     j, i, fn->cfg->bb[j]->nl[i]->line);
+            g(&is, Ilbl, locstrlbl(buf), NULL);
+            isel(&is, fn->cfg->bb[j]->nl[i]);
+        }
+    }
+    is.curbb = is.bb[is.nbb - 1];
+    epilogue(&is);
+    regalloc(&is);
+
+    if (debug)
+        writeasm(stdout, &is, fn);
+    writeasm(fd, &is, fn);
+}
--- /dev/null
+++ b/6/locs.c
@@ -1,0 +1,256 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "parse.h"
+#include "opt.h"
+#include "asm.h"
+
+Mode regmodes[] = {
+#define Reg(r, name, mode) mode,
+#include "regs.def"
+#undef Reg
+};
+
+char *regnames[] = {
+#define Reg(r, name, mode) name,
+#include "regs.def"
+#undef Reg
+};
+
+
+const Reg reginterferes[Nreg][Nmode + 1] = {
+    /* byte */
+    [Ral] = {Ral, Rax, Reax},
+    [Rcl] = {Rcl, Rcx, Recx},
+    [Rdl] = {Rdl, Rdx, Redx},
+    [Rbl] = {Rbl, Rbx, Rebx},
+
+    /* word */
+    [Rax] = {Ral, Rax, Reax},
+    [Rcx] = {Rcl, Rcx, Recx},
+    [Rdx] = {Rdl, Rdx, Redx},
+    [Rbx] = {Rbl, Rbx, Rebx},
+    [Rsi] = {Rsi, Resi},
+    [Rdi] = {Rdi, Redi},
+
+    /* dword */
+    [Reax] = {Ral, Rax, Reax},
+    [Recx] = {Rcl, Rcx, Recx},
+    [Redx] = {Rdl, Rdx, Redx},
+    [Rebx] = {Rbl, Rbx, Rebx},
+    [Resi] = {Rsi, Resi},
+    [Redi] = {Rdi, Redi},
+    [Resp] = {Resp},
+    [Rebp] = {Rebp},
+};
+
+char *genlblstr(char *buf, size_t sz)
+{
+    static int nextlbl;
+    snprintf(buf, 128, ".L%d", nextlbl++);
+    return buf;
+}
+
+Node *genlbl(void)
+{
+    char buf[128];
+
+    genlblstr(buf, 128);
+    return mklbl(-1, buf);
+}
+
+Loc *locstrlbl(char *lbl)
+{
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
+    l->type = Loclbl;
+    l->mode = ModeQ;
+    l->lbl = strdup(lbl);
+    return l;
+}
+
+Loc *loclitl(char *lbl)
+{
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
+    l->type = Loclitl;
+    l->mode = ModeQ;
+    l->lbl = strdup(lbl);
+    return l;
+}
+
+Loc *loclbl(Node *lbl)
+{
+    assert(lbl->type = Nlbl);
+    return locstrlbl(lbl->lbl.name);
+}
+
+Loc **locmap = NULL;
+size_t maxregid = 0;
+
+static Loc *locregid(regid id, Mode m)
+{
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
+    l->type = Locreg;
+    l->mode = m;
+    l->reg.id = id;
+    locmap = xrealloc(locmap, maxregid * sizeof(Loc*));
+    locmap[l->reg.id] = l;
+    return l;
+}
+
+Loc *locreg(Mode m)
+{
+    return locregid(maxregid++, m);
+}
+
+Loc *locphysreg(Reg r)
+{
+    static Loc *physregs[Nreg] = {0,};
+
+    if (physregs[r])
+        return physregs[r];
+    physregs[r] = locreg(regmodes[r]);
+    physregs[r]->reg.colour = r;
+    return physregs[r];
+}
+
+Loc *locmem(long disp, Loc *base, Loc *idx, Mode mode)
+{
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
+    l->type = Locmem;
+    l->mode = mode;
+    l->mem.constdisp = disp;
+    l->mem.base = base;
+    l->mem.idx = idx;
+    l->mem.scale = 0;
+    return l;
+}
+
+Loc *locmems(long disp, Loc *base, Loc *idx, int scale, Mode mode)
+{
+    Loc *l;
+
+    l = locmem(disp, base, idx, mode);
+    l->mem.scale = scale;
+    return l;
+}
+
+Loc *locmeml(char *disp, Loc *base, Loc *idx, Mode mode)
+{
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
+    l->type = Locmeml;
+    l->mode = mode;
+    l->mem.lbldisp = strdup(disp);
+    l->mem.base = base;
+    l->mem.idx = idx;
+    l->mem.scale = 0;
+    return l;
+}
+
+Loc *locmemls(char *disp, Loc *base, Loc *idx, int scale, Mode mode)
+{
+    Loc *l;
+
+    l = locmeml(disp, base, idx, mode);
+    l->mem.scale = scale;
+    return l;
+}
+
+
+Loc *loclit(long val, Mode m)
+{
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
+    l->type = Loclit;
+    l->mode = m;
+    l->lit = val;
+    return l;
+}
+
+Loc *coreg(Reg r, Mode m)
+{
+    Reg crtab[][Nmode + 1] = {
+        [Ral]  = {Rnone, Ral, Rax, Reax, Rrax},
+        [Rcl]  = {Rnone, Rcl, Rcx, Recx, Rrcx},
+        [Rdl]  = {Rnone, Rdl, Rdx, Redx, Rrdx},
+        [Rbl]  = {Rnone, Rbl, Rbx, Rebx, Rrbx},
+        [Rsil] = {Rnone, Rsil, Rsi, Resi, Rrsi},
+        [Rdil] = {Rnone, Rdil, Rdi, Redi, Rrdi},
+        [R8b]  = {Rnone, R8b, R8w, R8d, R8},
+        [R9b]  = {Rnone, R9b, R9w, R9d, R9},
+        [R10b] = {Rnone, R10b, R10w, R10d, R10},
+        [R11b] = {Rnone, R11b, R11w, R11d, R11},
+        [R12b] = {Rnone, R12b, R12w, R12d, R12},
+        [R13b] = {Rnone, R13b, R13w, R13d, R13},
+        [R14b] = {Rnone, R14b, R14w, R14d, R14},
+        [R15b] = {Rnone, R15b, R15w, R15d, R15},
+
+        [Rax]  = {Rnone, Ral,  Rax, Reax, Rrax},
+        [Rcx]  = {Rnone, Rcl,  Rcx, Recx, Rrcx},
+        [Rdx]  = {Rnone, Rdl,  Rdx, Redx, Rrdx},
+        [Rbx]  = {Rnone, Rbl,  Rbx, Rebx, Rrbx},
+        [Rsi]  = {Rnone, Rsil, Rsi, Resi, Rrsi},
+        [Rdi]  = {Rnone, Rsil, Rdi, Redi, Rrdi},
+        [R8w]  = {Rnone, R8b, R8w, R8d, R8},
+        [R9w]  = {Rnone, R9b, R9w, R9d, R9},
+        [R10w] = {Rnone, R10b, R10w, R10d, R10},
+        [R11w] = {Rnone, R11b, R11w, R11d, R11},
+        [R12w] = {Rnone, R12b, R12w, R12d, R12},
+        [R13w] = {Rnone, R13b, R13w, R13d, R13},
+        [R14w] = {Rnone, R14b, R14w, R14d, R14},
+        [R15w] = {Rnone, R15b, R15w, R15d, R15},
+
+        [Reax] = {Rnone, Ral, Rax, Reax},
+        [Recx] = {Rnone, Rcl, Rcx, Recx},
+        [Redx] = {Rnone, Rdl, Rdx, Redx},
+        [Rebx] = {Rnone, Rbl, Rbx, Rebx},
+        [Resi] = {Rnone, Rsil, Rsi, Resi},
+        [Redi] = {Rnone, Rsil, Rdi, Redi},
+        [R8d]  = {Rnone, R8b, R8w, R8d, R8},
+        [R9d]  = {Rnone, R9b, R9w, R9d, R9},
+        [R10d] = {Rnone, R10b, R10w, R10d, R10},
+        [R11d] = {Rnone, R11b, R11w, R11d, R11},
+        [R12d] = {Rnone, R12b, R12w, R12d, R12},
+        [R13d] = {Rnone, R13b, R13w, R13d, R13},
+        [R14d] = {Rnone, R14b, R14w, R14d, R14},
+        [R15d] = {Rnone, R15b, R15w, R15d, R15},
+
+        [Rrax] = {Rnone, Ral,  Rax, Reax, Rrax},
+        [Rrcx] = {Rnone, Rcl,  Rcx, Recx, Rrcx},
+        [Rrdx] = {Rnone, Rdl,  Rdx, Redx, Rrdx},
+        [Rrbx] = {Rnone, Rbl,  Rbx, Rebx, Rrbx},
+        [Rrsi] = {Rnone, Rsil, Rsi, Resi, Rrsi},
+        [Rrdi] = {Rnone, Rsil, Rdi, Redi, Rrdi},
+        [R8]   = {Rnone, R8b, R8w, R8d, R8},
+        [R9]   = {Rnone, R9b, R9w, R9d, R9},
+        [R10]  = {Rnone, R10b, R10w, R10d, R10},
+        [R11]  = {Rnone, R11b, R11w, R11d, R11},
+        [R12]  = {Rnone, R12b, R12w, R12d, R12},
+        [R13]  = {Rnone, R13b, R13w, R13d, R13},
+        [R14]  = {Rnone, R14b, R14w, R14d, R14},
+        [R15]  = {Rnone, R15b, R15w, R15d, R15},
+    };
+
+    assert(crtab[r][m] != Rnone);
+    return locphysreg(crtab[r][m]);
+}
+
--- /dev/null
+++ b/6/main.c
@@ -1,0 +1,108 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "parse.h"
+#include "opt.h"
+#include "asm.h"
+#include "platform.h"
+
+/* FIXME: move into one place...? */
+Node *file;
+int debug;
+char debugopt[128];
+char *outfile;
+char **incpaths;
+size_t nincpaths;
+
+static void usage(char *prog)
+{
+    printf("%s [-h] [-o outfile] [-d[dbgopts]] inputs\n", prog);
+    printf("\t-h\tPrint this help\n");
+    printf("\t-I path\tAdd 'path' to use search path\n");
+    printf("\t-d\tPrint debug dumps. Recognized options: f r p i\n");
+    printf("\t\t\tno options: print most common debug information\n");
+    printf("\t\t\tf: additionally log folded trees\n");
+    printf("\t\t\tl: additionally log lowered pre-cfg trees\n");
+    printf("\t\t\tT: additionally log tree immediately\n");
+    printf("\t\t\tr: additionally log register allocation activity\n");
+    printf("\t-o\tOutput to outfile\n");
+    printf("\t-S\tGenerate assembly instead of object code\n");
+}
+
+static void assem(char *f)
+{
+    char objfile[1024];
+    char cmd[1024];
+
+    swapsuffix(objfile, 1024, f, ".s", ".o");
+    snprintf(cmd, 1024, Asmcmd, objfile, f);
+    if (system(cmd) == -1)
+      die("Couldn't run assembler");
+}
+
+int main(int argc, char **argv)
+{
+    int opt;
+    int i;
+    Stab *globls;
+    char buf[1024];
+
+    while ((opt = getopt(argc, argv, "d::hSo:I:")) != -1) {
+        switch (opt) {
+            case 'o':
+                outfile = optarg;
+                break;
+            case 'h':
+                usage(argv[0]);
+                exit(0);
+                break;
+            case 'd':
+                debug = 1;
+                while (optarg && *optarg) {
+                    if (*optarg == 'y')
+                        yydebug = 1;
+                    debugopt[*optarg++ & 0x7f] = 1;
+                }
+                break;
+            case 'I':
+                lappend(&incpaths, &nincpaths, optarg);
+                break;
+            default:
+                usage(argv[0]);
+                exit(0);
+                break;
+        }
+    }
+
+    for (i = optind; i < argc; i++) {
+        globls = mkstab();
+        tyinit(globls);
+        tokinit(argv[i]);
+        file = mkfile(argv[i]);
+        file->file.exports = mkstab();
+        file->file.globls = globls;
+        yyparse();
+
+        /* before we do anything to the parse */
+        if (debugopt['T'])
+            dump(file, stdout);
+        infer(file);
+        /* after all processing */
+        if (debug)
+            dump(file, stdout);
+
+        swapsuffix(buf, 1024, argv[i], ".myr", ".s");
+        gen(file, buf);
+        assem(buf);
+    }
+
+    return 0;
+}
--- /dev/null
+++ b/6/platform.h
@@ -1,0 +1,9 @@
+#if defined(__APPLE__) && defined(__MACH__)
+/* for OSX */
+#   define Asmcmd "as -g -o %s %s"
+#   define Fprefix "_"
+#else
+/* Default to linux */
+#   define Asmcmd "as -g -o %s %s"
+#   define Fprefix ""
+#endif
--- /dev/null
+++ b/6/ra.c
@@ -1,0 +1,900 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include "parse.h"
+#include "opt.h"
+#include "asm.h"
+
+#define Sizetbits (CHAR_BIT*sizeof(size_t)) /* used in graph reprs */
+
+typedef struct Usage Usage;
+struct Usage {
+    int l[Maxarg + 1];
+    int r[Maxarg + 1];
+};
+
+static void printedge(FILE *fd, char *msg, size_t a, size_t b);
+
+Usage usetab[] = {
+#define Use(...) {__VA_ARGS__}
+#define Insn(i, fmt, use, def) use,
+#include "insns.def"
+#undef Insn
+#undef Use
+};
+
+Usage deftab[] = {
+#define Def(...) {__VA_ARGS__}
+#define Insn(i, fmt, use, def) def,
+#include "insns.def"
+#undef Insn
+#undef Def
+};
+
+Reg regmap[][Nmode] = {
+    [0]  = {Rnone, Ral, Rax, Reax, Rrax},
+    [1]  = {Rnone, Rcl, Rcx, Recx, Rrcx},
+    [2]  = {Rnone, Rdl, Rdx, Redx, Rrdx},
+    [3]  = {Rnone, Rbl, Rbx, Rebx, Rrbx},
+    [4]  = {Rnone, Rsil, Rsi, Resi, Rrsi},
+    [5]  = {Rnone, Rdil, Rdi, Redi, Rrdi},
+    [6]  = {Rnone, R8b, R8w, R8d, R8},
+    [7]  = {Rnone, R9b, R9w, R9d, R9},
+    [8]  = {Rnone, R10b, R10w, R10d, R10},
+    [9]  = {Rnone, R11b, R11w, R11d, R11},
+    [10]  = {Rnone, R12b, R12w, R12d, R12},
+    [11]  = {Rnone, R13b, R13w, R13d, R13},
+    [12]  = {Rnone, R14b, R14w, R14d, R14},
+    [13]  = {Rnone, R15b, R15w, R15d, R15},
+    [14]  = {Rnone, Rnone, Rnone, Resp},
+    [15]  = {Rnone, Rnone, Rnone, Rebp},
+};
+
+int colourmap[Nreg] = {
+    /* byte */
+    [Ral] = 0,
+    [Rcl] = 1,
+    [Rdl] = 2,
+    [Rbl] = 3,
+    [Rsil] = 4,
+    [Rdil] = 5,
+    [R8b] = 6,
+    [R9b] = 7,
+    [R10b] = 8,
+    [R11b] = 9,
+    [R12b] = 10,
+    [R13b] = 11,
+    [R14b] = 12,
+    [R15b] = 13,
+
+    /* word */
+    [Rax] = 0,
+    [Rcx] = 1,
+    [Rdx] = 2,
+    [Rbx] = 3,
+    [Rsi] = 4,
+    [Rdi] = 5,
+    [R8w] = 6,
+    [R9w] = 7,
+    [R10w] = 8,
+    [R11w] = 9,
+    [R12w] = 10,
+    [R13w] = 11,
+    [R14w] = 12,
+    [R15w] = 13,
+
+    /* dword */
+    [Reax] = 0,
+    [Recx] = 1,
+    [Redx] = 2,
+    [Rebx] = 3,
+    [Resi] = 4,
+    [Redi] = 5,
+    [R8d] = 6,
+    [R9d] = 7,
+    [R10d] = 8,
+    [R11d] = 9,
+    [R12d] = 10,
+    [R13d] = 11,
+    [R14d] = 12,
+    [R15d] = 13,
+
+    /* qword */
+    [Rrax] = 0,
+    [Rrcx] = 1,
+    [Rrdx] = 2,
+    [Rrbx] = 3,
+    [Rrsi] = 4,
+    [Rrdi] = 5,
+    [R8] = 6,
+    [R9] = 7,
+    [R10] = 8,
+    [R11] = 9,
+    [R12] = 10,
+    [R13] = 11,
+    [R14] = 12,
+    [R15] = 13,
+
+    [Rrsp] = 14,
+    [Rrbp] = 15,
+};
+
+/* %esp, %ebp are not in the allocatable pool */
+static int isfixreg(Loc *l)
+{
+    if (l->reg.colour == Resp)
+        return 1;
+    if (l->reg.colour == Rebp)
+        return 1;
+    return 0;
+}
+
+static size_t uses(Insn *insn, long *u)
+{
+    size_t i, j;
+    int k;
+    Loc *m;
+
+    j = 0;
+    /* Add all the registers used and defined. Duplicates
+     * in this list are fine, since they're being added to
+     * a set anyways */
+    for (i = 0; i < Maxarg; i++) {
+        if (!usetab[insn->op].l[i])
+            break;
+        k = usetab[insn->op].l[i] - 1;
+        /* non-registers are handled later */
+        if (insn->args[k]->type == Locreg)
+            if (!isfixreg(insn->args[k]))
+                u[j++] = insn->args[k]->reg.id;
+    }
+    /* some insns don't reflect their defs in the args.
+     * These are explictly listed in the insn description */
+    for (i = 0; i < Maxarg; i++) {
+        if (!usetab[insn->op].r[i])
+            break;
+        /* not a leak; physical registers get memoized */
+        u[j++] = locphysreg(usetab[insn->op].r[i])->reg.id;
+    }
+    /* If the registers are in an address calculation,
+     * they're used no matter what. */
+    for (i = 0; i < insn->nargs; i++) {
+        m = insn->args[i];
+        if (m->type != Locmem && m->type != Locmeml)
+            continue;
+        if (m->mem.base)
+            if (!isfixreg(m->mem.base))
+                u[j++] = m->mem.base->reg.id;
+        if (m->mem.idx)
+            if (!isfixreg(m->mem.base))
+                u[j++] = m->mem.idx->reg.id;
+    }
+    return j;
+}
+
+static size_t defs(Insn *insn, long *d)
+{
+    size_t i, j;
+    int k;
+
+    j = 0;
+    /* Add all the registers dsed and defined. Duplicates
+     * in this list are fine, since they're being added to
+     * a set anyways */
+    for (i = 0; i < Maxarg; i++) {
+        if (!deftab[insn->op].l[i])
+            break;
+        k = deftab[insn->op].l[i] - 1;
+        if (insn->args[k]->type == Locreg)
+            if (!isfixreg(insn->args[k]))
+                d[j++] = insn->args[k]->reg.id;
+    }
+    /* some insns don't reflect their defs in the args.
+     * These are explictly listed in the insn description */
+    for (i = 0; i < Maxarg; i++) {
+        if (!deftab[insn->op].r[i])
+            break;
+        /* not a leak; physical registers get memoized */
+        d[j++] = locphysreg(deftab[insn->op].r[i])->reg.id;
+    }
+    return j;
+}
+
+static void udcalc(Asmbb *bb)
+{
+    /* up to 2 registers per memloc, so
+     * 2*Maxarg is the maximum number of
+     * uses or defs we can see */
+    long   u[2*Maxarg], d[2*Maxarg];
+    size_t nu, nd;
+    size_t i, j;
+
+    bb->use = bsclear(bb->use);
+    bb->def = bsclear(bb->def);
+    for (i = 0; i < bb->ni; i++) {
+        nu = uses(bb->il[i], u);
+        nd = defs(bb->il[i], d);
+        for (j = 0; j < nu; j++)
+            if (!bshas(bb->def, u[j]))
+                bsput(bb->use, u[j]);
+        for (j = 0; j < nd; j++)
+            bsput(bb->def, d[j]);
+    }
+}
+
+static void liveness(Isel *s)
+{
+    Bitset *old;
+    Asmbb **bb;
+    size_t nbb;
+    size_t i, j;
+    int changed;
+
+    bb = s->bb;
+    nbb = s->nbb;
+    for (i = 0; i < nbb; i++) {
+        udcalc(s->bb[i]);
+        bb[i]->livein = bsclear(bb[i]->livein);
+        bb[i]->liveout = bsclear(bb[i]->liveout);
+    }
+
+    changed = 1;
+    while (changed) {
+        changed = 0;
+        for (i = 0; i < nbb; i++) {
+            old = bsdup(bb[i]->liveout);
+            /* liveout[b] = U(s in succ) livein[s] */
+            for (j = 0; bsiter(bb[i]->succ, &j); j++)
+                bsunion(bb[i]->liveout, bb[j]->livein);
+            /* livein[b] = use[b] U (out[b] \ def[b]) */
+            bb[i]->livein = bsclear(bb[i]->livein);
+            bsunion(bb[i]->livein, bb[i]->liveout);
+            bsdiff(bb[i]->livein, bb[i]->def);
+            bsunion(bb[i]->livein, bb[i]->use);
+            if (!changed)
+                changed = !bseq(old, bb[i]->liveout);
+        }
+    }
+}
+
+/* we're only interested in register->register moves */
+static int ismove(Insn *i)
+{
+    if (i->op != Imov)
+        return 0;
+    return i->args[0]->type == Locreg && i->args[1]->type == Locreg;
+}
+
+static int gbhasedge(Isel *s, size_t u, size_t v)
+{
+    size_t i;
+    i = (maxregid * v) + u;
+    return (s->gbits[i/Sizetbits] & (1ULL <<(i % Sizetbits))) != 0;
+}
+
+static void gbputedge(Isel *s, size_t u, size_t v)
+{
+    size_t i, j;
+
+    i = (maxregid * u) + v;
+    j = (maxregid * v) + u;
+    s->gbits[i/Sizetbits] |= 1ULL <<(i % Sizetbits);
+    s->gbits[j/Sizetbits] |= 1ULL <<(j % Sizetbits);
+    assert(gbhasedge(s, u, v) && gbhasedge(s, v, u));
+}
+
+static void addedge(Isel *s, size_t u, size_t v)
+{
+    if (u == v || gbhasedge(s, u, v))
+        return;
+    gbputedge(s, u, v);
+    gbputedge(s, v, u);
+    if (!bshas(s->prepainted, u)) {
+        bsput(s->gadj[u], v);
+        s->degree[u]++;
+    }
+    if (!bshas(s->prepainted, v)) {
+        bsput(s->gadj[v], u);
+        s->degree[v]++;
+    }
+}
+
+static void setup(Isel *s)
+{
+    Bitset **gadj;
+    size_t gchunks;
+    size_t i;
+
+    free(s->gbits);
+    gchunks = (maxregid*maxregid)/Sizetbits + 1;
+    s->gbits = zalloc(gchunks*sizeof(size_t));
+    /* fresh adj list repr. try to avoid reallocating all the bitsets */
+    gadj = zalloc(maxregid * sizeof(Bitset*));
+    if (s->gadj)
+        for (i = 0; i < maxregid; i++)
+            gadj[i] = bsclear(s->gadj[i]);
+    else
+        for (i = 0; i < maxregid; i++)
+            gadj[i] = mkbs();
+    free(s->gadj);
+    s->gadj = gadj;
+
+    s->spilled = bsclear(s->spilled);
+    s->coalesced = bsclear(s->coalesced);
+    /*
+    s->wlspill = bsclear(s->wlspill);
+    s->wlfreeze = bsclear(s->wlfreeze);
+    s->wlsimp = bsclear(s->wlsimp);
+    */
+
+    s->aliasmap = zalloc(maxregid * sizeof(size_t));
+    s->degree = zalloc(maxregid * sizeof(int));
+    s->rmoves = zalloc(maxregid * sizeof(Loc **));
+    s->nrmoves = zalloc(maxregid * sizeof(size_t));
+}
+
+static void build(Isel *s)
+{
+    long u[2*Maxarg], d[2*Maxarg];
+    size_t nu, nd;
+    size_t i, k;
+    ssize_t j;
+    Bitset *live;
+    Asmbb **bb;
+    size_t nbb;
+    Insn *insn;
+    size_t l;
+
+    /* set up convenience vars */
+    bb = s->bb;
+    nbb = s->nbb;
+
+    for (i = 0; i < nbb; i++) {
+        live = bsdup(bb[i]->liveout);
+        for (j = bb[i]->ni - 1; j >= 0; j--) {
+            insn = bb[i]->il[j];
+            nu = uses(insn, u);
+            nd = defs(insn, d);
+
+            /* moves get special treatment, since we don't want spurious
+             * edges between the src and dest */
+            if (ismove(insn)) {
+                /* live \= uses(i) */
+                for (k = 0; k < nu; k++)
+                    bsdel(live, u[k]);
+
+                for (k = 0; k < nu; k++)
+                    lappend(&s->rmoves[u[k]], &s->nrmoves[u[k]], insn);
+                for (k = 0; k < nd; k++)
+                    lappend(&s->rmoves[d[k]], &s->nrmoves[d[k]], insn);
+                lappend(&s->wlmove, &s->nwlmove, insn);
+            }
+            /* live = live U def(i) */
+            for (k = 0; k < nd; k++)
+                bsput(live, d[k]);
+
+            for (k = 0; k < nd; k++)
+                for (l = 0; bsiter(live, &l); l++)
+                    addedge(s, d[k], l);
+            /* live = use(i) U (live \ def(i)) */
+            for (k = 0; k < nd; k++)
+                bsdel(live, d[k]);
+            for (k = 0; k < nu; k++)
+                bsput(live, u[k]);
+        }
+    }
+}
+
+static int adjiter(Isel *s, regid n, regid *m)
+{
+    size_t i, r;
+
+    for (r = *m; bsiter(s->gadj[n], &r); r++) {
+        for (i = 0; i < s->nselstk; i++)
+            if (r == s->selstk[i]->reg.id)
+                goto next;
+        if (bshas(s->coalesced, r))
+            goto next;
+        assert(r < maxregid);
+        *m = r;
+        return 1;
+next:
+        continue;
+    }
+    return 0;
+}
+
+static size_t nodemoves(Isel *s, regid n, Insn ***pil)
+{
+    size_t i, j;
+    size_t count;
+
+    /* FIXME: inefficient. Do I care? */
+    count = 0;
+    if (pil)
+        *pil = NULL;
+    for (i = 0; i < s->nrmoves[n]; i++) {
+        for (j = 0; j < s->nmactive; j++) {
+            if (s->mactive[j] == s->rmoves[n][i]) {
+                if (pil)
+                    lappend(pil, &count, s->rmoves[n][i]);
+                continue;
+            }
+        }
+        for (j = 0; j < s->nwlmove; j++) {
+            if (s->wlmove[j] == s->rmoves[n][i]) {
+                if (pil)
+                    lappend(pil, &count, s->rmoves[n][i]);
+                continue;
+            }
+        }
+    }
+    return count;
+}
+
+static int moverelated(Isel *s, regid n)
+{
+    return nodemoves(s, n, NULL) != 0;
+}
+
+static void mkworklist(Isel *s)
+{
+    size_t i;
+
+    for (i = 0; i < maxregid; i++) {
+        if (bshas(s->prepainted, i))
+            continue;
+        else if (s->degree[i] >= K)
+            lappend(&s->wlspill, &s->nwlspill, locmap[i]);
+        else if (moverelated(s, i))
+            lappend(&s->wlfreeze, &s->nwlfreeze, locmap[i]);
+        else
+            lappend(&s->wlsimp, &s->nwlsimp, locmap[i]);
+    }
+}
+
+static void enablemove(Isel *s, regid n)
+{
+    size_t i, j;
+    Insn **il;
+    size_t ni;
+
+    ni = nodemoves(s, n, &il);
+    for (i = 0; i < ni; i++) {
+        for (j = 0; j < s->nmactive; j++) {
+            if (il[i] == s->mactive[j]) {
+                ldel(&s->mactive, &s->nmactive, j);
+                lappend(&s->wlmove, &s->nwlmove, il[i]);
+            }
+        }
+    }
+}
+
+static void decdegree(Isel *s, regid n)
+{
+    int d;
+    regid m;
+
+    assert(n < maxregid);
+    d = s->degree[n];
+    s->degree[n]--;
+
+    if (d == K) {
+        enablemove(s, n);
+        for (m = 0; adjiter(s, n, &m); m++)
+            enablemove(s, n);
+    }
+}
+
+static void simp(Isel *s)
+{
+    Loc *l;
+    regid m;
+
+    l = lpop(&s->wlsimp, &s->nwlsimp);
+    lappend(&s->selstk, &s->nselstk, l);
+    for (m = 0; adjiter(s, l->reg.id, &m); m++)
+        decdegree(s, m);
+}
+
+static regid getalias(Isel *s, regid id)
+{
+    while (1) {
+        if (!s->aliasmap[id])
+            break;
+        id = s->aliasmap[id]->reg.id;
+    };
+    return id;
+}
+
+static void wladd(Isel *s, regid u)
+{
+    size_t i;
+
+    if (bshas(s->prepainted, u))
+        return;
+    if (moverelated(s, u))
+        return;
+    if (s->degree[u] >= K)
+        return;
+    for (i = 0; i < s->nwlfreeze; i++)
+        if (s->wlfreeze[i]->reg.id == u)
+            ldel(&s->wlfreeze, &s->nwlfreeze, i);
+    lappend(&s->wlsimp, &s->nwlsimp, locmap[u]);
+}
+
+static int conservative(Isel *s, regid u, regid v)
+{
+    int k;
+    regid n;
+
+    k = 0;
+    for (n = 0; adjiter(s, u, &n); n++)
+        if (s->degree[n] >= K)
+            k++;
+    for (n = 0; adjiter(s, v, &n); n++)
+        if (s->degree[n] >= K)
+            k++;
+    return k < K;
+}
+
+/* FIXME: is this actually correct? */
+static int ok(Isel *s, regid t, regid r)
+{
+    if (s->degree[t] >= K)
+        return 0;
+    if (!bshas(s->prepainted, t))
+        return 0;
+    if (!gbhasedge(s, t, r))
+        return 0;
+    return 1;
+}
+
+static int combinable(Isel *s, regid u, regid v)
+{
+    regid t;
+
+    /* if u isn't prepainted, can we conservatively coalesce? */
+    if (!bshas(s->prepainted, u) && conservative(s, u, v))
+        return 1;
+
+    /* if it is, are the adjacent nodes ok to combine with this? */
+    for (t = 0; adjiter(s, u, &t); t++)
+        if (!ok(s, t, u))
+            return 0;
+    return 1;
+}
+
+static int wlhas(Loc **wl, size_t nwl, regid v, size_t *idx)
+{
+    size_t i;
+
+    for (i = 0; i < nwl; i++) {
+        if (wl[i]->reg.id == v) { 
+            *idx = i;
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static void combine(Isel *s, regid u, regid v)
+{
+    regid t;
+    size_t idx;
+    size_t i, j;
+    int has;
+
+    if (debugopt['r'])
+        printedge(stdout, "combining:", u, v);
+    if (wlhas(s->wlfreeze, s->nwlfreeze, v, &idx))
+        ldel(&s->wlfreeze, &s->nwlfreeze, idx);
+    else if (wlhas(s->wlspill, s->nwlspill, v, &idx))
+        ldel(&s->wlspill, &s->nwlspill, idx);
+    bsput(s->coalesced, v);
+    s->aliasmap[v] = locmap[u];
+
+    /* nodemoves[u] = nodemoves[u] U nodemoves[v] */
+    for (i = 0; i < s->nrmoves[v]; i++) {
+        has = 0;
+        for (j = 0; j < s->nrmoves[u]; j++) {
+            if (s->rmoves[v][i] == s->rmoves[u][j]) {
+                has = 1;
+                break;
+            }
+        }
+        if (!has)
+            lappend(&s->rmoves[u], &s->nrmoves[u], s->rmoves[v][i]);
+    }
+
+    for (t = 0; adjiter(s, v, &t); t++) {
+        if (debugopt['r'])
+            printedge(stdout, "combine-putedge:", v, t);
+        addedge(s, t, u);
+        decdegree(s, t);
+    }
+    if (s->degree[u] >= K && wlhas(s->wlfreeze, s->nwlfreeze, u, &idx)) {
+        ldel(&s->wlfreeze, &s->nwlfreeze, idx);
+        lappend(&s->wlspill, &s->nwlspill, locmap[u]);
+    }
+}
+
+static void coalesce(Isel *s)
+{
+    Insn *m;
+    regid u, v, tmp;
+
+    m = lpop(&s->wlmove, &s->nwlmove);
+    /* FIXME: src, dst? dst, src? Does it matter? */
+    u = getalias(s, m->args[0]->reg.id);
+    v = getalias(s, m->args[1]->reg.id);
+
+    if (bshas(s->prepainted, v)) {
+        tmp = u;
+        u = v;
+        v = tmp;
+    }
+
+    if (u == v) {
+        lappend(&s->mcoalesced, &s->nmcoalesced, m);
+        wladd(s, u);
+        wladd(s, v);
+    } else if (bshas(s->prepainted, v) || gbhasedge(s, u, v)) {
+        lappend(&s->mconstrained, &s->nmconstrained, m);
+        wladd(s, u);
+        wladd(s, v);
+    } else if (combinable(s, u, v)) {
+        lappend(&s->mcoalesced, &s->nmcoalesced, m);
+        combine(s, u, v);
+        wladd(s, u);
+    } else {
+        lappend(&s->mactive, &s->nmactive, m);
+    }
+}
+
+static int mldel(Insn ***ml, size_t *nml, Insn *m)
+{
+    size_t i;
+    for (i = 0; i < *nml; i++) {
+        if (m == (*ml)[i]) {
+            ldel(ml, nml, i);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static void freezemoves(Isel *s, Loc *u)
+{
+    size_t i;
+    Insn **ml;
+    Insn *m;
+    size_t nml;
+    size_t idx;
+    Loc *v;
+    
+    nml = nodemoves(s, u->reg.id, &ml);
+    for (i = 0; i < nml; i++) {
+        m = ml[i];
+        if (m->args[0] == u)
+            v = m->args[1];
+        else if (m->args[1] == u)
+            v = m->args[0];
+        else
+            continue;
+
+        if (!mldel(&s->mactive, &s->nmactive, m))
+            mldel(&s->wlmove, &s->nwlmove, m);
+        lappend(&s->mfrozen, &s->nmfrozen, m);
+        if (!nodemoves(s, v->reg.id, NULL) && s->degree[v->reg.id] < K) {
+            if (!wlhas(s->wlfreeze, s->nwlfreeze, v->reg.id, &idx))
+                die("Reg %zd not in freeze wl\n", v->reg.id);
+            ldel(&s->wlfreeze, &s->nwlfreeze, idx);
+            lappend(&s->wlsimp, &s->nwlsimp, v);
+        }
+
+    }
+    lfree(&ml, &nml);
+}
+
+static void freeze(Isel *s)
+{
+    Loc *l;
+
+    l = lpop(&s->wlfreeze, &s->nwlfreeze);
+    lappend(&s->wlsimp, &s->nwlsimp, l);
+    freezemoves(s, l);
+}
+
+static void selspill(Isel *s)
+{
+    size_t i;
+    Loc *m;
+
+    /* FIXME: pick a better heuristic for spilling */
+    m = NULL;
+    for (i = 0; i < s->nwlspill; i++) {
+        if (!bshas(s->spilled, s->wlspill[i]->reg.id)) {
+            m = s->wlspill[i];
+            ldel(&s->wlspill, &s->nwlspill, i);
+            break;
+        }
+    }
+    assert(m != NULL);
+    lappend(&s->wlsimp, &s->nwlsimp, m);
+    freezemoves(s, m);
+}
+
+static int paint(Isel *s)
+{
+    int taken[K + 2]; /* esp, ebp aren't "real colours" */
+    Loc *n, *w;
+    regid l;
+    size_t i;
+    int spilled;
+    int found;
+
+    spilled = 0;
+    while (s->nselstk) {
+        bzero(taken, K*sizeof(int));
+        n = lpop(&s->selstk, &s->nselstk);
+
+        for (l = 0; bsiter(s->gadj[n->reg.id], &l); l++) {
+            if (debugopt['r'] > 1)
+                printedge(stdout, "paint-edge:", n->reg.id, l);
+            w = locmap[getalias(s, l)];
+            if (w->reg.colour)
+                taken[colourmap[w->reg.colour]] = 1;
+        }
+
+        found = 0;
+        for (i = 0; i < K; i++) {
+            if (!taken[i]) {
+                if (debugopt['r']) {
+                    fprintf(stdout, "\tselecting ");
+                    locprint(stdout, n, 'x');
+                    fprintf(stdout, " = %s\n", regnames[regmap[i][n->mode]]);
+                }
+                n->reg.colour = regmap[i][n->mode];
+                found = 1;
+                break;
+            }
+        }
+        if (!found) {
+            spilled = 1;
+            bsput(s->spilled, n->reg.id);
+        }
+    }
+    for (l = 0; bsiter(s->coalesced, &l); l++) {
+        n = locmap[getalias(s, l)];
+        locmap[l]->reg.colour = n->reg.colour;
+    }
+    return spilled;
+}
+
+static void rewrite(Isel *s)
+{
+    die("Rewrite spills!");
+}
+
+void regalloc(Isel *s)
+{
+    size_t i;
+    int spilled;
+
+    s->prepainted = mkbs();
+    for (i = 0; i < maxregid; i++)
+        if (locmap[i]->reg.colour)
+            bsput(s->prepainted, i);
+    do {
+        setup(s);
+        liveness(s);
+        build(s);
+        mkworklist(s);
+        if (debugopt['r'])
+            dumpasm(s, stdout);
+        do {
+            if (s->nwlsimp)
+                simp(s);
+            else if (s->nwlmove)
+                coalesce(s);
+            else if (s->nwlfreeze)
+                freeze(s);
+            else if (s->nwlspill)
+                selspill(s);
+        } while (s->nwlsimp || s->nwlmove || s->nwlfreeze || s->nwlspill);
+        spilled = paint(s);
+        if (spilled)
+            rewrite(s);
+    } while (spilled);
+
+}
+
+static void setprint(FILE *fd, Bitset *s)
+{
+    char *sep;
+    size_t i;
+
+    sep = "";
+    for (i = 0; i < bsmax(s); i++) {
+        if (bshas(s, i)) {
+            fprintf(fd, "%s%zd", sep, i);
+            sep = ",";
+        }
+    }
+    fprintf(fd, "\n");
+}
+
+static void locsetprint(FILE *fd, Bitset *s)
+{
+    char *sep;
+    size_t i;
+
+    sep = "";
+    for (i = 0; i < bsmax(s); i++) {
+        if (bshas(s, i)) {
+            fprintf(fd, "%s", sep);
+            locprint(fd, locmap[i], 'x');
+            sep = ",";
+        }
+    }
+    fprintf(fd, "\n");
+}
+
+static void printedge(FILE *fd, char *msg, size_t a, size_t b)
+{
+    fprintf(fd, "\t%s ", msg);
+    locprint(fd, locmap[a], 'x');
+    fprintf(fd, " -- ");
+    locprint(fd, locmap[b], 'x');
+    fprintf(fd, "\n");
+}
+
+void dumpasm(Isel *s, FILE *fd)
+{
+    size_t i, j;
+    char *sep;
+    Asmbb *bb;
+
+    fprintf(fd, "IGRAPH ----- \n");
+    for (i = 0; i < maxregid; i++) {
+        for (j = i; j < maxregid; j++) {
+            if (gbhasedge(s, i, j))
+                printedge(stdout, "", i, j);
+        }
+    }
+    fprintf(fd, "ASM -------- \n");
+    for (j = 0; j < s->nbb; j++) {
+        bb = s->bb[j];
+        fprintf(fd, "\n");
+        fprintf(fd, "Bb: %d labels=(", bb->id);
+        sep = "";
+        for (i = 0; i < bb->nlbls; i++) {;
+            fprintf(fd, "%s%s", bb->lbls[i], sep);
+            sep = ",";
+        }
+        fprintf(fd, ")\n");
+
+        fprintf(fd, "Pred: ");
+        setprint(fd, bb->pred);
+        fprintf(fd, "Succ: ");
+        setprint(fd, bb->succ);
+
+        fprintf(fd, "Use: ");
+        locsetprint(fd, bb->use);
+        fprintf(fd, "Def: ");
+        locsetprint(fd, bb->def);
+        fprintf(fd, "Livein:  ");
+        locsetprint(fd, bb->livein);
+        fprintf(fd, "Liveout: ");
+        locsetprint(fd, bb->liveout);
+        for (i = 0; i < bb->ni; i++)
+            iprintf(fd, bb->il[i]);
+    }
+    fprintf(fd, "ENDASM -------- \n");
+}
--- /dev/null
+++ b/6/regs.def
@@ -1,0 +1,81 @@
+Reg(Rnone, "%NOREG", ModeB)
+/* byte regs */
+Reg(Ral, "%al", ModeB)
+Reg(Rcl, "%cl", ModeB)
+Reg(Rdl, "%dl", ModeB)
+Reg(Rbl, "%bl", ModeB)
+Reg(Rsil, "%sil", ModeB)
+Reg(Rdil, "%dil", ModeB)
+Reg(Rspl, "%spl", ModeB)
+Reg(Rbpl, "%bpl", ModeB)
+Reg(R8b, "%r8b", ModeB)
+Reg(R9b, "%r9b", ModeB)
+Reg(R10b, "%r10b", ModeB)
+Reg(R11b, "%r11b", ModeB)
+Reg(R12b, "%r12b", ModeB)
+Reg(R13b, "%r13b", ModeB)
+Reg(R14b, "%r14b", ModeB)
+Reg(R15b, "%r15b", ModeB)
+
+/* high byte regs. We *NEVER* allocate these */
+Reg(Rah, "%ah", ModeB)
+Reg(Rch, "%ch", ModeB)
+Reg(Rdh, "%dh", ModeB)
+Reg(Rbh, "%bh", ModeB)
+
+/* short regs */
+Reg(Rax, "%ax", ModeS)
+Reg(Rbx, "%bx", ModeS)
+Reg(Rcx, "%cx", ModeS)
+Reg(Rdx, "%dx", ModeS)
+Reg(Rsi, "%si", ModeS)
+Reg(Rdi, "%di", ModeS)
+Reg(Rsp, "%sp", ModeS)
+Reg(Rbp, "%bp", ModeS)
+Reg(R8w, "%r8w", ModeS)
+Reg(R9w, "%r9w", ModeS)
+Reg(R10w, "%r10w", ModeS)
+Reg(R11w, "%r11w", ModeS)
+Reg(R12w, "%r12w", ModeS)
+Reg(R13w, "%r13w", ModeS)
+Reg(R14w, "%r14w", ModeS)
+Reg(R15w, "%r15w", ModeS)
+
+
+/* long regs */
+Reg(Reax, "%eax", ModeL)
+Reg(Recx, "%ecx", ModeL)
+Reg(Redx, "%edx", ModeL)
+Reg(Rebx, "%ebx", ModeL)
+Reg(Resi, "%esi", ModeL)
+Reg(Redi, "%edi", ModeL)
+Reg(Resp, "%esp", ModeL)
+Reg(Rebp, "%ebp", ModeL)
+Reg(R8d, "%r8d", ModeL)
+Reg(R9d, "%r9d", ModeL)
+Reg(R10d, "%r10d", ModeL)
+Reg(R11d, "%r11d", ModeL)
+Reg(R12d, "%r12d", ModeL)
+Reg(R13d, "%r13d", ModeL)
+Reg(R14d, "%r14d", ModeL)
+Reg(R15d, "%r15d", ModeL)
+
+/* quad regs */
+Reg(Rrax, "%rax", ModeQ)
+Reg(Rrcx, "%rcx", ModeQ)
+Reg(Rrdx, "%rdx", ModeQ)
+Reg(Rrbx, "%rbx", ModeQ)
+Reg(Rrsi, "%rsi", ModeQ)
+Reg(Rrdi, "%rdi", ModeQ)
+Reg(Rrsp, "%rsp", ModeQ)
+Reg(Rrbp, "%rbp", ModeQ)
+Reg(R8, "%r8", ModeQ)
+Reg(R9, "%r9", ModeQ)
+Reg(R10, "%r10", ModeQ)
+Reg(R11, "%r11", ModeQ)
+Reg(R12, "%r12", ModeQ)
+Reg(R13, "%r13", ModeQ)
+Reg(R14, "%r14", ModeQ)
+Reg(R15, "%r15", ModeQ)
+
+Reg(Rrip, "%rip", ModeQ)
--- /dev/null
+++ b/6/simp.c
@@ -1,0 +1,1339 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "parse.h"
+#include "opt.h"
+#include "asm.h"
+
+#include "platform.h" /* HACK. We need some platform specific code gen behavior. *sigh.* */
+
+
+/* takes a list of nodes, and reduces it (and it's subnodes) to a list
+ * following these constraints:
+ *      - All nodes are expression nodes
+ *      - Nodes with side effects are root nodes
+ *      - All nodes operate on machine-primitive types and tuples
+ */
+typedef struct Simp Simp;
+struct Simp {
+    int isglobl;
+
+    Node **stmts;
+    size_t nstmts;
+
+    /* return handling */
+    Node *endlbl;
+    Node *ret;
+    int   isbigret;
+
+    /* pre/postinc handling */
+    Node **incqueue;
+    size_t nqueue;
+
+    /* location handling */
+    Node **blobs;
+    size_t nblobs;
+    size_t stksz;
+    size_t argsz;
+    Htab *globls;
+    Htab *locs;
+};
+
+static Node *simp(Simp *s, Node *n);
+static Node *rval(Simp *s, Node *n, Node *dst);
+static Node *lval(Simp *s, Node *n);
+static void declarelocal(Simp *s, Node *n);
+
+/* useful constants */
+static Type *tyintptr;
+static Type *tyvoid;
+
+static Type *base(Type *t)
+{
+    assert(t->nsub == 1);
+    return t->sub[0];
+}
+
+static Node *add(Node *a, Node *b)
+{
+    Node *n;
+
+    assert(size(a) == size(b));
+    n = mkexpr(a->line, Oadd, a, b, NULL);
+    n->expr.type = a->expr.type;
+    return n;
+}
+
+static Node *addk(Node *n, uvlong v)
+{
+    Node *k;
+
+    k = mkintlit(n->line, v);
+    k->expr.type = exprtype(n);
+    return add(n, k);
+}
+
+static Node *sub(Node *a, Node *b)
+{
+    Node *n;
+
+    n = mkexpr(a->line, Osub, a, b, NULL);
+    n->expr.type = a->expr.type;
+    return n;
+}
+
+static Node *subk(Node *n, uvlong v)
+{
+    Node *k;
+
+    k = mkintlit(n->line, v);
+    k->expr.type = exprtype(n);
+    return sub(n, k);
+}
+
+static Node *mul(Node *a, Node *b)
+{
+    Node *n;
+
+    n = mkexpr(a->line, Omul, a, b, NULL);
+    n->expr.type = a->expr.type;
+    return n;
+}
+
+static Node *addr(Node *a, Type *bt)
+{
+    Node *n;
+
+    n = mkexpr(a->line, Oaddr, a, NULL);
+    if (!bt)
+        n->expr.type = mktyptr(a->line, a->expr.type);
+    else
+        n->expr.type = mktyptr(a->line, bt);
+    return n;
+}
+
+static Node *load(Node *a)
+{
+    Node *n;
+
+    assert(a->expr.type->type == Typtr);
+    n = mkexpr(a->line, Oload, a, NULL);
+    n->expr.type = base(a->expr.type);
+    return n;
+}
+
+static Node *store(Node *a, Node *b)
+{
+    Node *n;
+
+    assert(a != NULL && b != NULL);
+    if (exprop(a) == Ovar)
+        n = mkexpr(a->line, Oset, a, b, NULL);
+    else
+        n = mkexpr(a->line, Ostor, a, b, NULL);
+    return n;
+}
+
+static Node *disp(int line, uint v)
+{
+    Node *n;
+
+    n = mkintlit(line, v);
+    n->expr.type = tyintptr;
+    return n;
+}
+
+static size_t did(Node *n)
+{
+    if (n->type == Ndecl) {
+        return n->decl.did;
+    } else if (n->type == Nexpr) {
+        assert(exprop(n) == Ovar);
+        return n->expr.did;
+    }
+    dump(n, stderr);
+    die("Can't get did");
+    return 0;
+}
+
+static ulong dclhash(void *dcl)
+{
+    /* large-prime hash. meh. */
+    return did(dcl) * 366787;
+}
+
+static int dcleq(void *a, void *b)
+{
+    return did(a) == did(b);
+}
+
+static void append(Simp *s, Node *n)
+{
+    lappend(&s->stmts, &s->nstmts, n);
+}
+
+static int ispure(Node *n)
+{
+    return ispureop[exprop(n)];
+}
+
+static int isconstfn(Node *s)
+{
+    return s->decl.isconst && decltype(s)->type == Tyfunc;
+}
+
+static char *asmname(Node *n)
+{
+    char *s;
+    int len;
+
+    len = strlen(Fprefix);
+    if (n->name.ns)
+        len += strlen(n->name.ns) + 1; /* +1 for separator */
+    len += strlen(n->name.name);
+
+    s = xalloc(len + 1);
+    s[0] = '\0';
+    sprintf(s, "%s", Fprefix);
+    if (n->name.ns)
+        sprintf(s, "%s%s$", s, n->name.ns);
+    sprintf(s, "%s%s", s, n->name.name);
+    return s;
+}
+
+int stacktype(Type *t)
+{
+    /* the types are arranged in types.def such that this is true */
+    t = tybase(t);
+    return t->type >= Tyslice;
+}
+
+int stacknode(Node *n)
+{
+    if (n->type == Nexpr)
+        return stacktype(n->expr.type);
+    else
+        return stacktype(n->decl.type);
+}
+
+size_t tysize(Type *t)
+{
+    size_t sz;
+    size_t i;
+
+    sz = 0;
+    if (!t)
+        die("size of empty type => bailing.");
+    switch (t->type) {
+        case Tyvoid:
+            die("void has no size");
+            return 1;
+        case Tybool: case Tyint8:
+        case Tybyte: case Tyuint8:
+            return 1;
+        case Tyint16: case Tyuint16:
+            return 2;
+        case Tyint: case Tyint32:
+        case Tyuint: case Tyuint32:
+        case Tychar:  /* utf32 */
+            return 4;
+
+        case Typtr: case Tyfunc:
+        case Tyvalist: /* ptr to first element of valist */
+            return Ptrsz;
+
+        case Tyint64: case Tylong:
+        case Tyuint64: case Tyulong:
+            return 8;
+
+            /*end integer types*/
+        case Tyfloat32:
+            return 4;
+        case Tyfloat64:
+            return 8;
+
+        case Tyslice:
+            return 2*Ptrsz; /* len; ptr */
+        case Tyalias:
+            return tysize(t->sub[0]);
+        case Tyarray:
+            assert(exprop(t->asize) == Olit);
+            return t->asize->expr.args[0]->lit.intval * tysize(t->sub[0]);
+        case Tytuple:
+            for (i = 0; i < t->nsub; i++)
+                sz += tysize(t->sub[i]);
+            return sz;
+            break;
+        case Tystruct:
+            for (i = 0; i < t->nmemb; i++)
+                sz += align(size(t->sdecls[i]), Ptrsz);
+            return sz;
+            break;
+        case Tyunion:
+            sz = Ptrsz;
+            for (i = 0; i < t->nmemb; i++)
+                if (t->udecls[i]->etype)
+                    sz = max(sz, tysize(t->udecls[i]->etype) + Ptrsz);
+            return align(sz, Ptrsz);
+            break;
+        case Tybad: case Tyvar: case Typaram: case Tyname: case Ntypes:
+            die("Type %s does not have size; why did it get down to here?", tystr(t));
+            break;
+    }
+    return -1;
+}
+
+size_t size(Node *n)
+{
+    Type *t;
+
+    if (n->type == Nexpr)
+        t = n->expr.type;
+    else
+        t = n->decl.type;
+    return tysize(t);
+}
+
+static Node *gentemp(Simp *simp, Node *e, Type *ty, Node **dcl)
+{
+    char buf[128];
+    static int nexttmp;
+    Node *t, *r, *n;
+
+    snprintf(buf, 128, ".t%d", nexttmp++);
+    n = mkname(e->line, buf);
+    t = mkdecl(e->line, n, ty);
+    r = mkexpr(e->line, Ovar, n, NULL);
+    r->expr.type = t->decl.type;
+    r->expr.did = t->decl.did;
+    if (dcl)
+        *dcl = t;
+    return r;
+}
+
+static Node *temp(Simp *simp, Node *e)
+{
+    Node *t, *dcl;
+
+    assert(e->type == Nexpr);
+    t = gentemp(simp, e, e->expr.type, &dcl);
+    declarelocal(simp, dcl);
+    return t;
+}
+
+static void jmp(Simp *s, Node *lbl)
+{
+    append(s, mkexpr(lbl->line, Ojmp, lbl, NULL));
+}
+
+static void cjmp(Simp *s, Node *cond, Node *iftrue, Node *iffalse)
+{
+    Node *jmp;
+
+    jmp = mkexpr(cond->line, Ocjmp, cond, iftrue, iffalse, NULL);
+    append(s, jmp);
+}
+
+/* if foo; bar; else baz;;
+ *      => cjmp (foo) :bar :baz */
+static void simpif(Simp *s, Node *n, Node *exit)
+{
+    Node *l1, *l2, *l3;
+    Node *iftrue, *iffalse;
+    Node *c;
+
+    l1 = genlbl();
+    l2 = genlbl();
+    if (exit)
+        l3 = exit;
+    else
+        l3 = genlbl();
+
+    iftrue = n->ifstmt.iftrue;
+    iffalse = n->ifstmt.iffalse;
+
+    c = rval(s, n->ifstmt.cond, NULL);
+    cjmp(s, c, l1, l2);
+    simp(s, l1);
+    simp(s, iftrue);
+    jmp(s, l3);
+    simp(s, l2);
+    /* because lots of bunched up end labels are ugly,
+     * coalesce them by handling 'elif'-like constructs
+     * separately */
+    if (iffalse && iffalse->type == Nifstmt) {
+        simpif(s, iffalse, exit);
+    } else {
+        simp(s, iffalse);
+        jmp(s, l3);
+    }
+
+    if (!exit)
+        simp(s, l3);
+}
+
+/* init; while cond; body;; 
+ *    => init
+ *       jmp :cond
+ *       :body
+ *           ...body...
+ *       :cond
+ *           ...cond...
+ *       cjmp (cond) :body :end
+ *       :end
+ */
+static void simploop(Simp *s, Node *n)
+{
+    Node *lbody;
+    Node *lend;
+    Node *lcond;
+    Node *t;
+
+    lbody = genlbl();
+    lcond = genlbl();
+    lend = genlbl();
+
+    simp(s, n->loopstmt.init);  /* init */
+    jmp(s, lcond);              /* goto test */
+    simp(s, lbody);             /* body lbl */
+    simp(s, n->loopstmt.body);  /* body */
+    simp(s, n->loopstmt.step);  /* step */
+    simp(s, lcond);             /* test lbl */
+    t = rval(s, n->loopstmt.cond, NULL);  /* test */
+    cjmp(s, t, lbody, lend);    /* repeat? */
+    simp(s, lend);              /* exit */
+}
+
+static Ucon *finducon(Node *n)
+{
+    size_t i;
+    Type *t;
+    Ucon *uc;
+
+    t = tybase(n->expr.type);
+    if (exprop(n) != Ocons)
+        return NULL;
+    for (i = 0; i  < t->nmemb; i++) {
+        uc = t->udecls[i];
+        if (!strcmp(namestr(uc->name), namestr(n->expr.args[0])))
+            return uc;
+    }
+    die("No ucon?!?");
+    return NULL;
+}
+
+static Node *uconid(Node *n, size_t off)
+{
+    Ucon *uc;
+
+    if (exprop(n) != Ocons)
+        return load(add(addr(n, tyintptr), disp(n->line, off)));
+
+    uc = finducon(n);
+    return disp(uc->line, uc->id);
+}
+
+static Node *uval(Node *n, size_t off, Type *t)
+{
+    if (exprop(n) == Ocons)
+        return n->expr.args[1];
+    else if (exprop(n) == Olit)
+        return n;
+    else
+        return load(addk(addr(n, t), off));
+}
+
+static Node *ucompare(Simp *s, Node *a, Node *b, Type *t, size_t off)
+{
+    Node *r, *v, *x, *y;
+    Ucon *uc;
+
+    assert(a->type == Nexpr);
+    t = tybase(t);
+    r = NULL;
+    switch (t->type) {
+        case Tyvoid: case Tybad: case Tyvalist: case Tyvar:
+        case Typaram: case Tyname: case Tyalias: case Ntypes:
+        case Tyint64: case Tyuint64: case Tylong:  case Tyulong:
+        case Tyfloat32: case Tyfloat64:
+        case Tyslice: case Tyarray: case Tytuple: case Tystruct:
+            die("Unsupported type for compare");
+            break;
+        case Tybool: case Tychar: case Tybyte:
+        case Tyint8: case Tyint16: case Tyint32: case Tyint:
+        case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint:
+        case Typtr: case Tyfunc:
+            x = uval(a, off, t);
+            y = uval(b, off, t);
+            r = mkexpr(a->line, Oeq, x, y, NULL);
+            r->expr.type = tyintptr;
+            break;
+        case Tyunion:
+            x = uconid(a, off);
+            y = uconid(b, off);
+            uc = finducon(a);
+            if (!uc)
+                uc = finducon(b);
+
+            r = mkexpr(a->line, Oeq, x, y, NULL);
+            r->expr.type = tyintptr;
+            if (uc->etype) {
+                off += Ptrsz;
+                v = ucompare(s, a, b, uc->etype, off);
+                r = mkexpr(a->line, Oland, r, v, NULL);
+                r->expr.type = tyintptr;
+                r = rval(s, r, NULL); /* Oland needs to be reduced */
+            }
+            break;
+    }
+    return r;
+            
+}
+
+FILE *f;
+static void simpmatch(Simp *s, Node *n)
+{
+    Node *end, *cur, *next; /* labels */
+    Node *val, *cond; /* intermediates */
+    Node *m;
+    size_t i;
+    f = stdout;
+
+    end = genlbl();
+    val = rval(s, n->matchstmt.val, NULL); /* FIXME: don't recompute, even if pure */
+    for (i = 0; i < n->matchstmt.nmatches; i++) {
+        m = n->matchstmt.matches[i];
+
+        /* check pattern */
+        cond = ucompare(s, val, m->match.pat, val->expr.type, 0);
+        cur = genlbl();
+        next = genlbl();
+        cjmp(s, cond, cur, next);
+
+        /* do the action if it matches */
+        append(s, cur);
+        simp(s, m->match.block);
+        jmp(s, end);
+        append(s, next);
+    }
+    append(s, end);
+}
+
+static void simpblk(Simp *s, Node *n)
+{
+    size_t i;
+
+    for (i = 0; i < n->block.nstmts; i++) {
+        simp(s, n->block.stmts[i]);
+    }
+}
+
+static Node *lowerlit(Simp *s, Node *lit, Node ***l, size_t *nl)
+{
+    Node *n, *d, *r;
+    char lbl[128];
+
+    n = mkname(lit->line, genlblstr(lbl, 128));
+    d = mkdecl(lit->line, n, lit->expr.type);
+    r = mkexpr(lit->line, Ovar, n, NULL);
+
+    d->decl.init = lit;
+    d->decl.type = lit->expr.type;
+    d->decl.isconst = 1;
+    r->expr.did = d->decl.did;
+    r->expr.type = lit->expr.type;
+    if (tybase(r->expr.type)->type == Tyfunc)
+        r = addr(r, tybase(r->expr.type));
+
+    htput(s->globls, d, strdup(lbl));
+    lappend(l, nl, d);
+    return r;
+}
+
+static size_t offset(Node *aggr, Node *memb)
+{
+    Type *ty;
+    Node **nl;
+    size_t i;
+    size_t off;
+
+    ty = tybase(exprtype(aggr));
+    if (ty->type == Typtr)
+        ty = tybase(ty->sub[0]);
+
+    assert(ty->type == Tystruct);
+    nl = ty->sdecls;
+    off = 0;
+    for (i = 0; i < ty->nmemb; i++) {
+        if (!strcmp(namestr(memb), declname(nl[i])))
+            return off;
+        off += size(nl[i]);
+    }
+    die("Could not find member %s in struct", namestr(memb));
+    return -1;
+}
+
+static Node *ptrsized(Simp *s, Node *v)
+{
+    if (size(v) == Ptrsz)
+        return v;
+    else if (size(v) < Ptrsz)
+        v = mkexpr(v->line, Ozwiden, v, NULL);
+    else if (size(v) > Ptrsz)
+        v = mkexpr(v->line, Otrunc, v, NULL);
+    v->expr.type = tyintptr;
+    return v;
+}
+
+static Node *membaddr(Simp *s, Node *n)
+{
+    Node *t, *u, *r;
+    Node **args;
+    Type *ty;
+
+    args = n->expr.args;
+    ty = tybase(exprtype(args[0]));
+    if (ty->type == Typtr) {
+        t = args[0];
+    } else {
+        t = addr(args[0], exprtype(n));
+    }
+    u = disp(n->line, offset(args[0], args[1]));
+    r = add(t, u);
+    r->expr.type = mktyptr(n->line, n->expr.type);
+    return r;
+}
+
+static Node *idxaddr(Simp *s, Node *n)
+{
+    Node *a, *t, *u, *v; /* temps */
+    Node *r; /* result */
+    Node **args;
+    size_t sz;
+
+    assert(exprop(n) == Oidx);
+    args = n->expr.args;
+    a = rval(s, args[0], NULL);
+    if (exprtype(args[0])->type == Tyarray)
+        t = addr(a, exprtype(n));
+    else if (args[0]->expr.type->type == Tyslice)
+        t = load(addr(a, mktyptr(n->line, exprtype(n))));
+    else
+        die("Can't index type %s\n", tystr(n->expr.type));
+    assert(t->expr.type->type == Typtr);
+    u = rval(s, args[1], NULL);
+    u = ptrsized(s, u);
+    sz = size(n);
+    v = mul(u, disp(n->line, sz));
+    r = add(t, v);
+    return r;
+}
+
+static Node *slicebase(Simp *s, Node *n, Node *off)
+{
+    Node *t, *u, *v;
+    int sz;
+
+    t = rval(s, n, NULL);
+    u = NULL;
+    switch (exprtype(n)->type) {
+        case Typtr:     u = n; break;
+        case Tyarray:   u = addr(t, base(exprtype(n))); break;
+        case Tyslice:   u = load(addr(t, mktyptr(n->line, base(exprtype(n))))); break;
+        default: die("Unslicable type %s", tystr(n->expr.type));
+    }
+    /* safe: all types we allow here have a sub[0] that we want to grab */
+    off = ptrsized(s, off);
+    sz = tysize(n->expr.type->sub[0]);
+    v = mul(off, disp(n->line, sz));
+    return add(u, v);
+}
+
+static Node *slicelen(Simp *s, Node *sl)
+{
+    /* *(&sl + sizeof(size_t)) */
+    return load(addk(addr(sl, tyintptr), Ptrsz));
+}
+
+Node *lval(Simp *s, Node *n)
+{
+    Node *r;
+
+    switch (exprop(n)) {
+        case Ovar:      r = n;  break;
+        case Oidx:      r = idxaddr(s, n); break;
+        case Oderef:    r = rval(s, n->expr.args[0], NULL); break;
+        case Omemb:     r = membaddr(s, n); break;
+        default:
+            die("%s cannot be an lval", opstr(exprop(n)));
+            break;
+    }
+    return r;
+}
+
+static Node *simplazy(Simp *s, Node *n, Node *r)
+{
+    Node *a, *b;
+    Node *next;
+    Node *end;
+
+    next = genlbl();
+    end = genlbl();
+    a = rval(s, n->expr.args[0], NULL);
+    append(s, store(r, a));
+    if (exprop(n) == Oland)
+        cjmp(s, r, next, end);
+    else if (exprop(n) == Olor)
+        cjmp(s, a, end, next);
+    append(s, next);
+    b = rval(s, n->expr.args[1], NULL);
+    append(s, store(r, b));
+    append(s, end);
+    return r;
+}
+
+static Node *lowerslice(Simp *s, Node *n, Node *dst)
+{
+    Node *t;
+    Node *start, *end;
+    Node *base, *sz, *len;
+    Node *stbase, *stlen;
+
+    if (dst)
+        t = dst;
+    else
+        t = temp(s, n);
+    /* *(&slice) = (void*)base + off*sz */
+    base = slicebase(s, n->expr.args[0], n->expr.args[1]);
+    start = ptrsized(s, rval(s, n->expr.args[1], NULL));
+    end = ptrsized(s, rval(s, n->expr.args[2], NULL));
+    len = sub(end, start);
+    stbase = store(addr(t, tyintptr), base);
+    /* *(&slice + ptrsz) = len */
+    sz = addk(addr(t, tyintptr), Ptrsz);
+    stlen = store(sz, len);
+    append(s, stbase);
+    append(s, stlen);
+    return t;
+}
+
+static Node *lowercast(Simp *s, Node *n)
+{
+    Node **args;
+    Node *sz;
+    Node *r;
+    Type *t;
+    int issigned;
+    size_t fromsz, tosz;
+
+    issigned = 0;
+    r = NULL;
+    args = n->expr.args;
+    switch (tybase(exprtype(n))->type) {
+        case Tyint8: case Tyint16: case Tyint32: case Tyint64:
+        case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint64:
+        case Tyint: case Tyuint: case Tylong: case Tyulong:
+        case Tychar: case Tybyte:
+        case Typtr:
+            t = tybase(exprtype(args[0]));
+            switch (t->type) {
+                case Tyslice:
+                    if (t->type == Typtr)
+                        fatal(n->line, "Bad cast from %s to %s",
+                              tystr(exprtype(args[0])), tystr(exprtype(n)));
+                    sz = mkintlit(n->line, Ptrsz);
+                    sz->expr.type = exprtype(args[0]);
+                    r = slicebase(s, args[0], sz);
+                    break;
+                case Tyint8: case Tyint16: case Tyint32: case Tyint64:
+                case Tyint: case Tylong:
+                    issigned = 1;
+                case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint64:
+                case Tyuint: case Tyulong: case Tychar: case Tybyte:
+                case Typtr:
+                    fromsz = size(args[0]);
+                    tosz = size(n);
+                    r = rval(s, args[0], NULL);
+                    if (fromsz > tosz) {
+                        r = mkexpr(n->line, Otrunc, r, NULL);
+                    } else if (tosz > fromsz) {
+                        if (issigned)
+                            r = mkexpr(n->line, Oswiden, r, NULL);
+                        else
+                            r = mkexpr(n->line, Ozwiden, r, NULL);
+                    }
+                    r->expr.type = n->expr.type;
+                    break;
+                default:
+                    fatal(n->line, "Bad cast from %s to %s",
+                          tystr(exprtype(args[0])), tystr(exprtype(n)));
+            }
+            break;
+        default:
+            fatal(n->line, "Bad cast from %s to %s",
+                  tystr(exprtype(args[0])), tystr(exprtype(n)));
+    }
+    return r;
+}
+
+static Node *visit(Simp *s, Node *n)
+{
+    size_t i;
+    Node *r;
+
+    for (i = 0; i < n->expr.nargs; i++)
+        n->expr.args[i] = rval(s, n->expr.args[i], NULL);
+    if (ispure(n)) {
+        r = n;
+    } else {
+        if (exprtype(n)->type == Tyvoid) {
+            r = NULL;
+            append(s, n);
+        } else {
+            r = temp(s, n);
+            append(s, store(r, n));
+        }
+    }
+    return r;
+}
+
+Node *destructure(Simp *s, Node *lhs, Node *rhs)
+{
+    Node *plv, *prv, *lv, *sz, *stor, **args;
+    size_t off, i;
+
+    args = lhs->expr.args;
+    rhs = rval(s, rhs, NULL);
+    off = 0;
+    for (i = 0; i < lhs->expr.nargs; i++) {
+        lv = lval(s, args[i]);
+        prv = add(addr(rhs, exprtype(args[i])), disp(rhs->line, off));
+        if (stacknode(args[i])) {
+            sz = disp(lhs->line, size(lv));
+            plv = addr(lv, exprtype(lv));
+            stor = mkexpr(lhs->line, Oblit, plv, prv, sz, NULL);
+        } else {
+            stor = store(lv, load(prv));
+        }
+        append(s, stor);
+        off += size(lv);
+    }
+
+    return NULL;
+}
+
+Node *assign(Simp *s, Node *lhs, Node *rhs)
+{
+    Node *t, *u, *v, *r;
+
+    if (exprop(lhs) == Otup) {
+        r = destructure(s, lhs, rhs);
+    } else {
+        t = lval(s, lhs);
+        u = rval(s, rhs, t);
+
+        /* if we stored the result into t, rval() should return that,
+         * so we know our work is done. */
+        if (u == t) {
+            r = t;
+        } else if (stacknode(lhs)) {
+            t = addr(t, exprtype(lhs));
+            u = addr(u, exprtype(lhs));
+            v = disp(lhs->line, size(lhs));
+            r = mkexpr(lhs->line, Oblit, t, u, v, NULL);
+        } else if (exprop(lhs) == Ovar) {
+            r = mkexpr(lhs->line, Oset, t, u, NULL);
+        } else {
+            r = mkexpr(lhs->line, Ostor, t, u, NULL);
+        }
+    }
+    return r;
+}
+
+static Node *lowertup(Simp *s, Node *n, Node *dst)
+{
+    Node *pdst, *pval, *val, *sz, *stor, **args;
+    Node *r;
+    size_t i, off;
+
+    args = n->expr.args;
+    if (!dst)
+        dst = temp(s, n);
+    r = addr(dst, exprtype(dst));
+
+    off = 0;
+    for (i = 0; i < n->expr.nargs; i++) {
+        val = rval(s, args[i], NULL);
+        pdst = add(r, disp(n->line, off));
+        if (stacknode(args[i])) {
+            sz = disp(n->line, size(val));
+            pval = addr(val, exprtype(val));
+            stor = mkexpr(n->line, Oblit, pdst, pval, sz, NULL);
+        } else {
+            stor = store(pdst, val);
+        }
+        append(s, stor);
+        off += size(args[i]);
+    }
+    return dst;
+}
+
+static Node *lowerucon(Simp *s, Node *n, Node *dst)
+{
+    Node *tmp, *u, *tag, *elt, *sz;
+    Node *r;
+    Type *ty;
+    Ucon *uc;
+    size_t i;
+
+    /* find the ucon we're constructing here */
+    ty = tybase(n->expr.type);
+    uc = NULL;
+    for (i = 0; i < ty->nmemb; i++) {
+        if (!strcmp(namestr(n->expr.args[0]), namestr(ty->udecls[i]->name))) {
+            uc = ty->udecls[i];
+            break;
+        }
+    }
+    if (!uc)
+        die("Couldn't find union constructor");
+
+    if (dst)
+        tmp = dst;
+    else
+        tmp = temp(s, n);
+    u = addr(tmp, exprtype(n));
+    tag = disp(n->line, uc->id);
+    append(s, store(u, tag));
+    if (!uc->etype)
+        return tmp;
+
+    elt = rval(s, n->expr.args[1], NULL);
+    u = addk(u, Ptrsz);
+    if (stacktype(uc->etype)) {
+        elt = addr(elt, uc->etype);
+        sz = disp(n->line, tysize(uc->utype));
+        r = mkexpr(n->line, Oblit, u, elt, sz, NULL);
+    } else {
+        r = store(u, elt);
+    }
+    append(s, r);
+    return tmp;
+}
+
+static Node *rval(Simp *s, Node *n, Node *dst)
+{
+    Node *r; /* expression result */
+    Node *t, *u, *v; /* temporary nodes */
+    Node **args;
+    size_t i;
+    const Op fusedmap[Numops] = {
+        [Oaddeq]        = Oadd,
+        [Osubeq]        = Osub,
+        [Omuleq]        = Omul,
+        [Odiveq]        = Odiv,
+        [Omodeq]        = Omod,
+        [Oboreq]        = Obor,
+        [Obandeq]       = Oband,
+        [Obxoreq]       = Obxor,
+        [Obsleq]        = Obsl,
+        [Obsreq]        = Obsr,
+    };
+
+
+    r = NULL;
+    args = n->expr.args;
+    switch (exprop(n)) {
+        case Obad:
+        case Olor: case Oland:
+            r = temp(s, n);
+            simplazy(s, n, r);
+            break;
+        case Osize:
+            r = mkintlit(n->line, size(args[0]));
+            r->expr.type = exprtype(n);
+            break;
+        case Oslice:
+            r = lowerslice(s, n, dst);
+            break;
+        case Oidx:
+            t = idxaddr(s, n);
+            r = load(t);
+            break;
+        case Omemb:
+            if (exprtype(args[0])->type == Tyslice) {
+                assert(!strcmp(namestr(args[1]), "len"));
+                r = slicelen(s, args[0]);
+            } else if (exprtype(args[0])->type == Tyarray) {
+                assert(!strcmp(namestr(args[1]), "len"));
+                r = exprtype(args[0])->asize;
+            } else {
+                t = membaddr(s, n);
+                r = load(t);
+            }
+            break;
+        case Ocons:
+            r = lowerucon(s, n, dst);
+            break;
+        case Otup:
+            r = lowertup(s, n, dst);
+            break;
+        case Ocast:
+            /* slice -> ptr cast */
+            r = lowercast(s, n);
+            break;
+
+        /* fused ops:
+         * foo ?= blah
+         *    =>
+         *     foo = foo ? blah*/
+        case Oaddeq: case Osubeq: case Omuleq: case Odiveq: case Omodeq:
+        case Oboreq: case Obandeq: case Obxoreq: case Obsleq: case Obsreq:
+            assert(fusedmap[exprop(n)] != Obad);
+            u = rval(s, args[0], NULL);
+            v = rval(s, args[1], NULL);
+            v = mkexpr(n->line, fusedmap[exprop(n)], u, v, NULL);
+            v->expr.type = u->expr.type;
+            r = store(u, v);
+            break;
+
+        /* ++expr(x)
+         *  => args[0] = args[0] + 1
+         *     expr(x) */
+        case Opreinc:
+            v = assign(s, args[0], addk(args[0], 1));
+            append(s, v);
+            r = rval(s, args[0], NULL);
+            break;
+        case Opredec:
+            v = assign(s, args[0], subk(args[0], 1));
+            append(s, v);
+            r = rval(s, args[0], NULL);
+            break;
+
+        /* expr(x++)
+         *     =>
+         *      expr
+         *      x = x + 1
+         */
+        case Opostinc:
+            r = rval(s, args[0], NULL);
+            t = store(lval(s, args[0]), addk(r, 1));
+            lappend(&s->incqueue, &s->nqueue, t);
+            break;
+        case Opostdec:
+            r = rval(s, args[0], NULL);
+            t = store(lval(s, args[0]), subk(r, 1));
+            lappend(&s->incqueue, &s->nqueue, t);
+            break;
+        case Olit:
+            switch (args[0]->lit.littype) {
+                case Lchr: case Lbool: case Lint:
+                    r = n;
+                    break;
+                case Lstr: case Lseq: case Lflt:
+                    r = lowerlit(s, n, &s->blobs, &s->nblobs);
+                    break;
+                case Lfunc:
+                    r = lowerlit(s, n, &file->file.stmts, &file->file.nstmts);
+                    break;
+            }
+            break;
+        case Ovar:
+            r = n;
+            break;
+        case Oret:
+            if (s->isbigret) {
+                t = rval(s, args[0], NULL);
+                t = addr(t, exprtype(args[0]));
+                u = disp(n->line, size(args[0]));
+                v = mkexpr(n->line, Oblit, s->ret, t, u, NULL);
+                append(s, v);
+            } else if (n->expr.nargs && n->expr.args[0]) {
+                t = s->ret;
+                t = store(t, rval(s, args[0], NULL));
+                append(s, t);
+            }
+            jmp(s, s->endlbl);
+            break;
+        case Oasn:
+            r = assign(s, args[0], args[1]);
+            break;
+        case Ocall:
+            if (exprtype(n)->type != Tyvoid && size(n) > Ptrsz) {
+                r = temp(s, n);
+                linsert(&n->expr.args, &n->expr.nargs, 1, addr(r, exprtype(n)));
+                for (i = 0; i < n->expr.nargs; i++)
+                    n->expr.args[i] = rval(s, n->expr.args[i], NULL);
+                append(s, n);
+            } else {
+                r = visit(s, n);
+            }
+            break;
+        case Oaddr:
+            t = lval(s, args[0]);
+            if (exprop(t) == Ovar) /* Ovar is the only one that doesn't return the address directly */
+                r = addr(t, exprtype(t));
+            else
+                r = t;
+            break;
+        default:
+            r = visit(s, n);
+    }
+    return r;
+}
+
+static void declarelocal(Simp *s, Node *n)
+{
+    assert(n->type == Ndecl);
+    s->stksz += size(n);
+    s->stksz = align(s->stksz, min(size(n), Ptrsz));
+    if (debug)
+        printf("declare %s:%s(%zd) at %zd\n", declname(n), tystr(decltype(n)), n->decl.did, s->stksz);
+    htput(s->locs, n, (void*)s->stksz);
+}
+
+static void declarearg(Simp *s, Node *n)
+{
+    assert(n->type == Ndecl);
+    s->argsz = align(s->argsz, min(size(n), Ptrsz));
+    if (debug)
+        printf("declare %s(%zd) at %zd\n", declname(n), n->decl.did, -(s->argsz + 2*Ptrsz));
+    htput(s->locs, n, (void*)-(s->argsz + 2*Ptrsz));
+    s->argsz += size(n);
+}
+
+static Node *simp(Simp *s, Node *n)
+{
+    Node *r, *t, *u;
+    size_t i;
+
+    if (!n)
+        return NULL;
+    r = NULL;
+    switch (n->type) {
+        case Nlit:       r = n;                 break;
+        case Nlbl:       append(s, n);          break;
+        case Nblock:     simpblk(s, n);         break;
+        case Nifstmt:    simpif(s, n, NULL);    break;
+        case Nloopstmt:  simploop(s, n);        break;
+        case Nmatchstmt: simpmatch(s, n);       break;
+        case Nexpr:
+            r = rval(s, n, NULL);
+            if (r)
+                append(s, r);
+            /* drain the increment queue for this expr */
+            for (i = 0; i < s->nqueue; i++)
+                append(s, s->incqueue[i]);
+            lfree(&s->incqueue, &s->nqueue);
+            break;
+
+        case Ndecl:
+            declarelocal(s, n);
+            if (n->decl.init) {
+                t = mkexpr(n->line, Ovar, n->decl.name, NULL);
+                u = mkexpr(n->line, Oasn, t, n->decl.init, NULL);
+                u->expr.type = n->decl.type;
+                t->expr.type = n->decl.type;
+                t->expr.did = n->decl.did;
+                simp(s, u);
+            }
+            break;
+        default:
+            die("Bad node passsed to simp()");
+            break;
+    }
+    return r;
+}
+
+static void flatten(Simp *s, Node *f)
+{
+    Node *dcl;
+    Type *ty;
+    size_t i;
+
+    assert(f->type == Nfunc);
+    s->nstmts = 0;
+    s->stmts = NULL;
+    s->endlbl = genlbl();
+    s->ret = NULL;
+
+    assert(f->type == Nfunc);
+
+    ty = f->func.type->sub[0];
+    if (stacktype(ty)) {
+        s->isbigret = 1;
+        s->ret = gentemp(s, f, mktyptr(f->line, ty), &dcl);
+        declarearg(s, dcl);
+    } else if (ty->type != Tyvoid) {
+        s->isbigret = 0;
+        s->ret = gentemp(s, f, ty, &dcl);
+        declarelocal(s, dcl);
+    }
+
+    for (i = 0; i < f->func.nargs; i++) {
+      declarearg(s, f->func.args[i]);
+    }
+    simp(s, f->func.body);
+
+    append(s, s->endlbl);
+}
+
+static Func *lowerfn(Simp *s, char *name, Node *n, int export)
+{
+    size_t i;
+    Func *fn;
+    Cfg *cfg;
+
+    if(debug)
+        printf("\n\nfunction %s\n", name);
+
+    if (!n->decl.init)
+        return NULL;
+    /* set up the simp context */
+    /* unwrap to the function body */
+    n = n->expr.args[0];
+    n = n->lit.fnval;
+    flatten(s, n);
+
+    if (debug)
+        for (i = 0; i < s->nstmts; i++)
+            dump(s->stmts[i], stdout);
+    for (i = 0; i < s->nstmts; i++) {
+        if (s->stmts[i]->type != Nexpr)
+            continue;
+        if (debugopt['f']) {
+            printf("FOLD FROM ----------\n");
+            dump(s->stmts[i], stdout);
+        }
+        s->stmts[i] = fold(s->stmts[i]);
+        if (debugopt['f']) {
+            printf("FOLD TO ------------\n");
+            dump(s->stmts[i], stdout);
+            printf("END ----------------\n");
+        }
+    }
+    cfg = mkcfg(s->stmts, s->nstmts);
+    if (debug)
+        dumpcfg(cfg, stdout);
+
+    fn = zalloc(sizeof(Func));
+    fn->name = strdup(name);
+    fn->isexport = export;
+    fn->stksz = align(s->stksz, 8);
+    fn->locs = s->locs;
+    fn->ret = s->ret;
+    fn->cfg = cfg;
+    return fn;
+}
+
+static void fillglobls(Stab *st, Htab *globls)
+{
+    void **k;
+    size_t i, nk;
+    Stab *stab;
+    Node *s;
+
+    k = htkeys(st->dcl, &nk);
+    for (i = 0; i < nk; i++) {
+        s = htget(st->dcl, k[i]);
+        htput(globls, s, asmname(s->decl.name));
+    }
+    free(k);
+
+    k = htkeys(st->ns, &nk);
+    for (i = 0; i < nk; i++) {
+        stab = htget(st->ns, k[i]);
+        fillglobls(stab, globls);
+    }
+    free(k);
+}
+
+static void lowerdcl(Node *dcl, Htab *globls, Func ***fn, size_t *nfn, Node ***blob, size_t *nblob)
+{
+    Simp s = {0,};
+    char *name;
+    Func *f;
+
+    name = asmname(dcl->decl.name);
+    s.locs = mkht(dclhash, dcleq);
+    s.globls = globls;
+    s.blobs = *blob;
+    s.nblobs = *nblob;
+
+    if (isconstfn(dcl)) {
+        if (!dcl->decl.isextern && !dcl->decl.isgeneric) {
+            f = lowerfn(&s, name, dcl->decl.init, dcl->decl.isexport);
+            lappend(fn, nfn, f);
+        }
+    } else {
+        dcl->decl.init = fold(dcl->decl.init);
+        if (dcl->decl.init && exprop(dcl->decl.init) == Olit)
+            lappend(&s.blobs, &s.nblobs, dcl);
+        /* uninitialized global vars get zero-initialized decls */
+        else if (!dcl->decl.isconst && !dcl->decl.init)
+            lappend(&s.blobs, &s.nblobs, dcl);
+        else
+            die("We don't lower globls with nonlit inits yet...");
+    }
+    *blob = s.blobs;
+    *nblob = s.nblobs;
+    free(name);
+}
+
+void gen(Node *file, char *out)
+{
+    Htab *globls;
+    Node *n, **blob;
+    Func **fn;
+    size_t nfn, nblob;
+    size_t i;
+    FILE *fd;
+
+    /* declare useful constants */
+    tyintptr = mkty(-1, Tyuint64);
+    tyvoid = mkty(-1, Tyvoid);
+
+    fn = NULL;
+    nfn = 0;
+    blob = NULL;
+    nblob = 0;
+    globls = mkht(dclhash, dcleq);
+
+    /* We need to define all global variables before use */
+    fillglobls(file->file.globls, globls);
+
+    for (i = 0; i < file->file.nstmts; i++) {
+        n = file->file.stmts[i];
+        switch (n->type) {
+            case Nuse: /* nothing to do */ 
+                break;
+            case Ndecl:
+                lowerdcl(n, globls, &fn, &nfn, &blob, &nblob);
+                break;
+            default:
+                die("Bad node %s in toplevel", nodestr(n->type));
+                break;
+        }
+    }
+
+    fd = fopen(out, "w");
+    if (!fd)
+        die("Couldn't open fd %s", out);
+
+    fprintf(fd, ".data\n");
+    for (i = 0; i < nblob; i++)
+        genblob(fd, blob[i], globls);
+    fprintf(fd, ".text\n");
+    for (i = 0; i < nfn; i++)
+        genasm(fd, fn[i], globls);
+    fclose(fd);
+}