shithub: mc

Download patch

ref: 5757b833cae9a9e60d62248128a8225119e238ff
parent: 66de4498b2b94e9fde735c7c3c1c19e2f5a362ce
parent: c5d3f887bcb243a46e0ea11edbe2851844f79d73
author: Ori Bernstein <[email protected]>
date: Thu Jun 14 23:52:59 EDT 2012

Merge branch 'nicer-ra'

Conflicts:
	8/asm.h
	8/isel.c
	8/regalloc.c

--- a/8/Makefile
+++ b/8/Makefile
@@ -1,12 +1,11 @@
 BIN=8m
 OBJ=isel.o \
     main.o \
+    ra.o \
     reduce.o \
     regalloc.o \
 
-CFLAGS+=-I../parse -I../opt
-LDFLAGS+=-L../parse -lparse -L../opt -lopt
-EXTRADEP=../parse/libparse.a ../opt/libopt.a
+DEPS=../parse/libparse.a ../opt/libopt.a
 
 include ../mk/lexyacc.mk
 include ../mk/c.mk
--- a/8/asm.h
+++ b/8/asm.h
@@ -1,7 +1,17 @@
-#define MaxArg 4
+#define Maxarg 4
+#define K 4 /* 4 general purpose regs with all modes available */
 
+typedef size_t regid;
+
+typedef struct Insn Insn;
+typedef struct Loc Loc;
+typedef struct Func Func;
+typedef struct Blob Blob;
+typedef struct Isel Isel;
+typedef struct Asmbb Asmbb;
+
 typedef enum {
-#define Insn(val, fmt, attr) val,
+#define Insn(val, fmt, use, def) val,
 #include "insns.def"
 #undef Insn
 } AsmOp;
@@ -17,7 +27,6 @@
     Locnone,
     Loclbl,  /* label */
     Locreg,  /* register */
-    Locpseu, /* pseudo-reg */
     Locmem,  /* reg offset mem */
     Locmeml, /* label offset mem */
     Loclit,
@@ -33,12 +42,6 @@
     Nmode,
 } Mode;
 
-typedef struct Insn Insn;
-typedef struct Loc Loc;
-typedef struct Func Func;
-typedef struct Blob Blob;
-typedef struct Isel Isel;
-
 struct Blob {
     char *name; /* mangled asm name */
     void *data;
@@ -50,8 +53,10 @@
     Mode mode;
     union {
         char *lbl;
-        Reg   reg;
-        long  pseudo;
+        struct {
+            regid id;
+            Reg   colour;
+        } reg;
         long  lit;
         /* disp(base + index) */
         struct {
@@ -58,9 +63,9 @@
             /* only one of lbldisp and constdisp may be used */
             char *lbldisp;
             long constdisp;
-            int scale;
-            Reg base;
-            Reg idx;
+            int scale; /* 0,1,2,4, or 8 */
+            Loc *base; /* needed */
+            Loc *idx;  /* optional */
         } mem;
     };
 };
@@ -67,8 +72,8 @@
 
 struct Insn {
     AsmOp op;
-    Loc args[MaxArg];
-    int narg;
+    Loc *args[Maxarg];
+    size_t nargs;
 };
 
 struct Func {
@@ -77,56 +82,114 @@
     size_t stksz;
     Htab *locs;
     Node *ret;
-    Node **nl;
-    size_t nn;
+    Cfg  *cfg;
 };
 
-/* instruction selection state */
-struct Isel {
+struct Asmbb {
+    int id;
+    char **lbls;
+    size_t nlbls;
     Insn **il;
     size_t ni;
-    Node *ret;
-    Htab *locs; /* decl id => int stkoff */
-    Htab *globls; /* decl id => char *globlname */
 
-    /* register and spill states */
-    size_t stksz;
-    Loc stkszloc;
-    int rtaken[Nreg];
-    Node *rcontains[Nreg];
-    Loc *locmap;
+    Bitset *pred;
+    Bitset *succ;
+    Bitset *use;
+    Bitset *def;
+    Bitset *livein;
+    Bitset *liveout;
 };
 
+
+/* instruction selection state */
+struct Isel {
+    Cfg  *cfg;          /* cfg built with nodes */
+
+    Asmbb **bb;         /* 1:1 mappings with the Node bbs in the CFG */
+    size_t nbb;
+    Asmbb *curbb;
+
+    Node *ret;          /* we store the return into here */
+    Htab *locs;         /* decl id => int stkoff */
+    Htab *globls;       /* decl id => char *globlname */
+
+    /* increased when we spill */
+    Loc *stksz;
+
+    /* register allocator state */
+    Bitset *prepainted; /* locations that need to be a specific colour */
+
+    size_t *gbits;      /* igraph matrix repr */
+    Bitset **gadj;      /* igraph adj set repr */
+    int *degree;        /* degree of nodes */
+    Loc **aliasmap;   /* mapping of aliases */
+
+    Loc  **selstk;
+    size_t nselstk;
+
+    Bitset *coalesced;
+    Bitset *spilled;
+
+    Insn ***rmoves;
+    size_t *nrmoves;
+
+    /* move sets */
+    Insn **mcoalesced;
+    size_t nmcoalesced;
+
+    Insn **mconstrained;
+    size_t nmconstrained;
+
+    Insn **mfrozen;
+    size_t nmfrozen;
+
+    Insn **mactive;
+    size_t nmactive;
+
+
+    /* worklists */
+    Insn **wlmove;
+    size_t nwlmove;
+
+    Loc **wlspill;
+    size_t nwlspill;
+
+    Loc **wlfreeze;
+    size_t nwlfreeze;
+
+    Loc **wlsimp;
+    size_t nwlsimp;
+};
+
 /* entry points */
 void genasm(FILE *fd, Func *fn, Htab *globls);
 void gen(Node *file, char *out);
 
 /* location generation */
-Loc *loclbl(Loc *l, Node *lbl);
-Loc *locstrlbl(Loc *l, char *lbl);
-Loc *locreg(Loc *l, Reg r);
-Loc *locmem(Loc *l, long disp, Reg base, Reg idx, Mode mode);
-Loc *locmeml(Loc *l, char *disp, Reg base, Reg idx, Mode mode);
-Loc *locmems(Loc *l, long disp, Reg base, Reg idx, int scale, Mode mode);
-Loc *locmemls(Loc *l, char *disp, Reg base, Reg idx, int scale, Mode mode);
-Loc *loclit(Loc *l, long val);
+extern size_t maxregid;
+extern Loc **locmap; /* mapping from reg id => Loc * */
+
+Loc *loclbl(Node *lbl);
+Loc *locstrlbl(char *lbl);
+Loc *locreg(Mode m);
+Loc *locphysreg(Reg r);
+Loc *locmem(long disp, Loc *base, Loc *idx, Mode mode);
+Loc *locmeml(char *disp, Loc *base, Loc *idx, Mode mode);
+Loc *locmems(long disp, Loc *base, Loc *idx, int scale, Mode mode);
+Loc *locmemls(char *disp, Loc *base, Loc *idx, int scale, Mode mode);
+Loc *loclit(long val);
+
 void locprint(FILE *fd, Loc *l);
 void iprintf(FILE *fd, Insn *insn);
 
 /* register allocation */
-Loc getreg(Isel *s, Mode m);
-Loc claimreg(Isel *s, Reg r);
-void freeloc(Isel *s, Loc l);
-void freereg(Isel *s, Reg r);
-void in(Isel *s, Node *n, Loc l);
-extern const char *regnames[];
-extern const Mode regmodes[];
+extern char *regnames[]; /* name table */
+extern Mode regmodes[];  /* mode table */
 
-/* code gen */
-void spill(Isel *s, Reg r);
-void g(Isel *s, AsmOp op, ...);
+void regalloc(Isel *s);
 
+
 /* useful functions */
 size_t size(Node *n);
-size_t tysize(Type *t);
 void breakhere();
+void dumpasm(Isel *s, FILE *fd);
--- a/8/insns.def
+++ b/8/insns.def
@@ -6,7 +6,7 @@
         %m            - A memory location.
         %l            - A location (either register or memory)
         %x            - Any value.
-        %[0-9]*t      - Mode of an operand. The optional number
+        %[1-9]*t      - Mode of an operand. The optional number
                         preceeding it is the operand desired for
                         the mode.
         %v            - a value (ie, immediate integer or label)
@@ -13,52 +13,54 @@
     Currently, there aren't any attrs, because none were needed yet.
     Eventually, they'll probably include flag setting and so on.
 
-   */
+    For technical reasons, the indexing on use and def statments is 1-based,
+    instead of 0-based. (0 is the sentinel value).
+*/
 
 /* Note, the mov instruction is specified in an overly general manner. */
-Insn(Inone,     "BAD_INSN",                     0)
-Insn(Imov,      "\tmov%t %x,%x\n",              0)
-Insn(Imovz,     "\tmovz%0t%1t %x,%x\n",         0)
-Insn(Imovs,     "\tmovs%0t%1t %x,%x\n",         0)
-Insn(Ilea,      "\tlea%t %x,%x\n",              0)
+Insn(Inone,     "BAD_INSN",                     Use(), Def())
+Insn(Imov,      "\tmov%t %x,%x\n", 		Use(.l={1}),			Def(.l={2}))
+Insn(Imovz,     "\tmovz%1t%2t %x,%x\n",         Use(.l={1}),			Def(.l={2}))
+Insn(Imovs,     "\tmovs%1t%2t %x,%x\n",         Use(.l={1}),			Def(.l={2}))
+Insn(Ilea,      "\tlea%t %x,%x\n",              Use(.l={1}),			Def(.l={2}))
 
-Insn(Iadd,      "\tadd%t %r,%x\n",              0)
-Insn(Isub,      "\tsub%t %r,%x\n",              0)
-Insn(Imul,      "\tmul%t %r\n",              0)
-Insn(Idiv,      "\tdiv%t %r\n",              0)
-Insn(Ineg,      "\tneg%t %r\n",              0)
-Insn(Iand,      "\tand%t %r,%x\n",              0)
-Insn(Ior,       "\tor%t %r,%x\n",               0)
-Insn(Ixor,      "\txor%t %r,%x\n",              0)
-Insn(Inot,      "\tnot%t %x\n",              0)
-Insn(Ishl,      "\tsal%1t %r,%x\n",              0)
-Insn(Isar,      "\tshr%1t %r,%x\n",              0)
-Insn(Ishr,      "\tshr%1t %r,%x\n",              0)
+Insn(Iadd,      "\tadd%t %r,%x\n",              Use(.l={1,2}),			Def(.l={2}))
+Insn(Isub,      "\tsub%t %r,%x\n",              Use(.l={1,2}),			Def(.l={2}))
+Insn(Imul,      "\tmul%t %r\n",              	Use(.l={1},.r={Reax}),		Def(.r={Reax,Redx}))
+Insn(Idiv,      "\tdiv%t %r\n",              	Use(.l={1},.r={Reax,Redx}),	Def(.r={Reax,Redx}))
+Insn(Ineg,      "\tneg%t %r\n",              	Use(.l={1}),			Def(.l={1}))
+Insn(Iand,      "\tand%t %r,%x\n",              Use(.l={1,2}),			Def(.l={2}))
+Insn(Ior,       "\tor%t %r,%x\n",               Use(.l={1,2}),			Def(.l={2}))
+Insn(Ixor,      "\txor%t %r,%x\n",              Use(.l={1,2}),			Def(.l={2}))
+Insn(Inot,      "\tnot%t %x\n",              	Use(.l={1}),			Def(.l={1}))
+Insn(Ishl,      "\tsal%2t %r,%x\n",             Use(.l={1,2}),			Def(.l={2}))
+Insn(Isar,      "\tshr%2t %r,%x\n",             Use(.l={1,2}),			Def(.l={2}))
+Insn(Ishr,      "\tshr%2t %r,%x\n",             Use(.l={1,2}),			Def(.l={2}))
 
-Insn(Itest,     "\ttest%t %r,%r\n",             0)
-Insn(Icmp,      "\tcmp%t %r,%r\n",              0)
+Insn(Itest,     "\ttest%t %r,%r\n",             Use(.l={1,2}),			Def(.l={2}))
+Insn(Icmp,      "\tcmp%t %r,%r\n",              Use(.l={1,2}),			Def(.l={2}))
 
-Insn(Ipush,     "\tpush%t %r\n",                0)
-Insn(Ipop,      "\tpop%t %r\n",                 0)
+Insn(Ipush,     "\tpush%t %r\n",                Use(.l={1}),			Def())
+Insn(Ipop,      "\tpop%t %r\n",                 Use(.l={1}),			Def())
 
 /* branch instructions */
-Insn(Isetz,       "\tsetz %v\n",                0)
-Insn(Isetnz,      "\tsetnz %v\n",               0)
-Insn(Isetlt,      "\tsetlt %v\n",               0)
-Insn(Isetle,      "\tsetle %v\n",               0)
-Insn(Isetgt,      "\tsetgt %v\n",               0)
-Insn(Isetge,      "\tsetge %v\n",               0)
+Insn(Isetz,       "\tsetz %v\n",                Use(),	Def(.l={2}))
+Insn(Isetnz,      "\tsetnz %v\n",               Use(),	Def(.l={2}))
+Insn(Isetlt,      "\tsetlt %v\n",               Use(),	Def(.l={2}))
+Insn(Isetle,      "\tsetle %v\n",               Use(),	Def(.l={2}))
+Insn(Isetgt,      "\tsetgt %v\n",               Use(),	Def(.l={2}))
+Insn(Isetge,      "\tsetge %v\n",               Use(),	Def(.l={2}))
 
 /* branch instructions */
-Insn(Icall,     "\tcall %v\n",                  0)
-Insn(Ijmp,      "\tjmp %v\n",                   0)
-Insn(Ijz,       "\tjz %v\n",                    0)
-Insn(Ijnz,      "\tjnz %v\n",                   0)
-Insn(Ijl,       "\tjl %v\n",                    0)
-Insn(Ijle,      "\tjle %v\n",                   0)
-Insn(Ijg,       "\tjg %v\n",                    0)
-Insn(Ijge,      "\tjge %v\n",                   0)
-Insn(Iret,      "\tret\n",                      0)
+Insn(Icall,     "\tcall %v\n",                  Use(.l={1}), Def())
+Insn(Ijmp,      "\tjmp %v\n",                   Use(.l={1}), Def())
+Insn(Ijz,       "\tjz %v\n",                    Use(.l={1}), Def())
+Insn(Ijnz,      "\tjnz %v\n",                   Use(.l={1}), Def())
+Insn(Ijl,       "\tjl %v\n",                    Use(.l={1}), Def())
+Insn(Ijle,      "\tjle %v\n",                   Use(.l={1}), Def())
+Insn(Ijg,       "\tjg %v\n",                    Use(.l={1}), Def())
+Insn(Ijge,      "\tjge %v\n",                   Use(.l={1}), Def())
+Insn(Iret,      "\tret\n",                      Use(), Def())
 
 /* not really an insn... */
-Insn(Ilbl,      "%v:\n",                        0)
+Insn(Ilbl,      "%v:\n",                        Use(), Def())
--- a/8/isel.c
+++ b/8/isel.c
@@ -11,11 +11,12 @@
 #include <unistd.h>
 
 #include "parse.h"
+#include "opt.h"
 #include "asm.h"
 
 /* string tables */
 char *insnfmts[] = {
-#define Insn(val, fmt, attr) fmt,
+#define Insn(val, fmt, use, def) fmt,
 #include "insns.def"
 #undef Insn
 };
@@ -29,7 +30,7 @@
 };
 
 /* forward decls */
-void selexpr(Isel *s, Node *n);
+Loc *selexpr(Isel *s, Node *n);
 
 /* used to decide which operator is appropriate
  * for implementing various conditional operators */
@@ -47,9 +48,10 @@
     [Ole] = {Icmp, Ijle, Isetle}
 };
 
-static Loc loc(Isel *s, Node *n)
+
+static Loc *loc(Isel *s, Node *n)
 {
-    Loc l;
+    Loc *l;
     Node *v;
     size_t stkoff;
 
@@ -57,9 +59,9 @@
         case Ovar:
             if (hthas(s->locs, (void*)n->expr.did)) {
                 stkoff = (size_t)htget(s->locs, (void*)n->expr.did);
-                locmem(&l, -stkoff, Rebp, Rnone, ModeL);
+                l = locmem(-stkoff, locphysreg(Rebp), NULL, ModeL);
             } else if (hthas(s->globls, (void*)n->expr.did)) {
-                locstrlbl(&l, htget(s->globls, (void*)n->expr.did));
+                l = locstrlbl(htget(s->globls, (void*)n->expr.did));
             } else {
                 die("%s (id=%ld) not found", namestr(n->expr.args[0]), n->expr.did);
             }
@@ -67,9 +69,9 @@
         case Olit:
             v = n->expr.args[0];
             switch (v->lit.littype) {
-                case Lchr:      loclit(&l, v->lit.chrval); break;
-                case Lbool:     loclit(&l, v->lit.boolval); break;
-                case Lint:      loclit(&l, v->lit.intval); break;
+                case Lchr:      l = loclit(v->lit.chrval); break;
+                case Lbool:     l = loclit(v->lit.boolval); break;
+                case Lint:      l = loclit(v->lit.intval); break;
                 default:
                                 die("Literal type %s should be blob", litstr(v->lit.littype));
             }
@@ -81,26 +83,13 @@
     return l;
 }
 
-static Loc getloc(Isel *s, Node *n)
-{
-    if (s->locmap[n->nid].type != Locnone)
-        return s->locmap[n->nid];
-    else if (exprop(n) == Ndecl)
-        return loc(s, n);
-    else
-        die("Unevaluated node %s", opstr(exprop(n)));
-    return (Loc){Locnone,};
-}
-
 static Mode mode(Node *n)
 {
     return ModeL;
 }
 
-static Loc coreg(Loc r, Mode m)
+static Loc *coreg(Reg r, Mode m)
 {
-    Loc l;
-
     Reg crtab[][Nmode + 1] = {
         [Ral] = {Rnone, Ral, Rax, Reax},
         [Rcl] = {Rnone, Rcl, Rcx, Recx},
@@ -121,10 +110,9 @@
         [Resi] = {Rnone, Rnone, Rsi, Resi},
         [Redi] = {Rnone, Rnone, Rdi, Redi},
     };
-    if (r.type != Locreg)
-        die("Non-reg passed to coreg()");
-    locreg(&l, crtab[r.reg][m]);
-    return l;
+
+    assert(crtab[r][m] != Rnone);
+    return locphysreg(crtab[r][m]);
 }
 
 static Insn *mkinsnv(AsmOp op, va_list ap)
@@ -137,12 +125,12 @@
     i = malloc(sizeof(Insn));
     i->op = op;
     while ((l = va_arg(ap, Loc*)) != NULL)
-        i->args[n++] = *l;
-    i->narg = n;
+        i->args[n++] = l;
+    i->nargs = n;
     return i;
 }
 
-void g(Isel *s, AsmOp op, ...)
+static void g(Isel *s, AsmOp op, ...)
 {
     va_list ap;
     Insn *i;
@@ -150,48 +138,48 @@
     va_start(ap, op);
     i = mkinsnv(op, ap);
     va_end(ap);
-    lappend(&s->il, &s->ni, i);
+    lappend(&s->curbb->il, &s->curbb->ni, i);
 }
 
 static void load(Isel *s, Loc *a, Loc *b)
 {
-    Loc l;
+    Loc *l;
     assert(b->type == Locreg);
     if (a->type == Locreg)
-        locmem(&l, 0, b->reg, Rnone, a->mode);
+        locmem(0, b, Rnone, a->mode);
     else
-        l = *a;
-    g(s, Imov, &l, b, NULL);
+        l = a;
+    g(s, Imov, l, b, NULL);
 }
 
 static void stor(Isel *s, Loc *a, Loc *b)
 {
-    Loc l;
+    Loc *l;
 
     assert(a->type == Locreg || a->type == Loclit);
     if (b->type == Locreg)
-        locmem(&l, 0, b->reg, Rnone, b->mode);
+        locmem(0, b, Rnone, b->mode);
     else
-        l = *b;
-    g(s, Imov, a, &l, NULL);
+        l = b;
+    g(s, Imov, a, l, NULL);
 }
 
 /* ensures that a location is within a reg */
-static Loc inr(Isel *s, Loc a)
+static Loc *inr(Isel *s, Loc *a)
 {
-    Loc r;
+    Loc *r;
 
-    if (a.type == Locreg)
+    if (a->type == Locreg)
         return a;
-    r = getreg(s, a.mode);
-    load(s, &a, &r);
+    r = locreg(a->mode);
+    load(s, a, r);
     return r;
 }
 
 /* ensures that a location is within a reg or an imm */
-static Loc inri(Isel *s, Loc a)
+static Loc *inri(Isel *s, Loc *a)
 {
-    if (a.type == Locreg || a.type == Loclit)
+    if (a->type == Locreg || a->type == Loclit)
         return a;
     else
         return inr(s, a);
@@ -198,9 +186,9 @@
 }
 
 /* ensures that a location is within a reg or an imm */
-static Loc inrm(Isel *s, Loc a)
+static Loc *inrm(Isel *s, Loc *a)
 {
-    if (a.type == Locreg || a.type == Locmem)
+    if (a->type == Locreg || a->type == Locmem)
         return a;
     else
         return inr(s, a);
@@ -217,8 +205,8 @@
  * directly */
 static void selcjmp(Isel *s, Node *n, Node **args)
 {
-    Loc a, b;
-    Loc l1, l2;
+    Loc *a, *b;
+    Loc *l1, *l2;
     AsmOp cond, jmp;
 
     cond = reloptab[exprop(args[0])].test;
@@ -226,37 +214,33 @@
     /* if we have a cond, we're knocking off the redundant test,
      * and want to eval the children */
     if (cond) {
-        selexpr(s, args[0]->expr.args[1]);
-        selexpr(s, args[0]->expr.args[0]);
-        a = getloc(s, args[0]->expr.args[1]);
-        b = inr(s, getloc(s, args[0]->expr.args[0]));
+        a = selexpr(s, args[0]->expr.args[1]);
+        b = selexpr(s, args[0]->expr.args[0]);
+        b = inr(s, b);
     } else {
         cond = Itest;
         jmp = Ijnz;
-        selexpr(s, args[0]);
-        a = inr(s, getloc(s, args[0])); /* cond */
+        a = inr(s, selexpr(s, args[0])); /* cond */
         b = a;
     }
 
     /* the jump targets will always be evaluated the same way */
-    loclbl(&l1, args[1]); /* if true */
-    loclbl(&l2, args[2]); /* if false */
+    l1 = loclbl(args[1]); /* if true */
+    l2 = loclbl(args[2]); /* if false */
 
-    g(s, cond, &a, &b, NULL);
-    g(s, jmp, &l1, NULL);
-    g(s, Ijmp, &l2, NULL);
+    g(s, cond, a, b, NULL);
+    g(s, jmp, l1, NULL);
+    g(s, Ijmp, l2, NULL);
 }
 
-static Loc binop(Isel *s, AsmOp op, Node *x, Node *y)
+static Loc *binop(Isel *s, AsmOp op, Node *x, Node *y)
 {
-    Loc a, b;
+    Loc *a, *b;
 
-    selexpr(s, x);
-    selexpr(s, y);
-    a = inr(s, getloc(s, x));
-    b = getloc(s, y);
-    g(s, op, &b, &a, NULL);
-    freeloc(s, b);
+    a = selexpr(s, x);
+    b = selexpr(s, y);
+    a = inr(s, a);
+    g(s, op, b, a, NULL);
     return a;
 }
 
@@ -289,50 +273,48 @@
     return 1;
 }
 
-static Loc memloc(Isel *s, Node *e, Mode m)
+static Loc *memloc(Isel *s, Node *e, Mode m)
 {
     Node **args;
-    Loc l, b, o; /* location, base, offset */
+    Loc *l, *b, *o; /* location, base, offset */
     int scale;
 
     scale = 0;
+    l = NULL;
     if (exprop(e) == Oadd) {
         args = e->expr.args;
-        selexpr(s, args[0]);
-        if (ismergablemul(args[1], &scale)) {
-            selexpr(s, args[1]->expr.args[0]);
-            o = getloc(s, args[1]->expr.args[0]);
-        } else {
-            selexpr(s, args[1]);
-            o = getloc(s, args[1]);
-        }
+        b = selexpr(s, args[0]);
+        if (ismergablemul(args[1], &scale))
+            o = selexpr(s, args[1]->expr.args[0]);
+        else
+            o = selexpr(s, args[1]);
 
-        b = inr(s, getloc(s, args[0]));
-        if (o.type == Loclit) {
-            locmem(&l, o.lit, b.reg, Rnone, m);
-        } else if (o.type == Locreg) {
+        if (b->type != Locreg)
             b = inr(s, b);
-            locmems(&l, 0, b.reg, o.reg, scale, m);
+        if (o->type == Loclit) {
+            l = locmem(o->lit, b, Rnone, m);
+        } else if (o->type == Locreg) {
+            b = inr(s, b);
+            l = locmems(0, b, o, scale, m);
         }
     } else {
-        selexpr(s, e);
-        l = getloc(s, e);
-        if (l.type == Locreg)
-            locmem(&l, 0, l.reg, Rnone, m);
+        l = selexpr(s, e);
+        if (l->type == Locreg)
+            l = locmem(0, l, Rnone, m);
     }
     return l;
 }
 
-static Loc gencall(Isel *s, Node *n)
+static Loc *gencall(Isel *s, Node *n)
 {
     int argsz, argoff;
     size_t i;
-    Loc eax, esp;       /* hard-coded registers */
-    Loc stkbump;        /* calculated stack offset */
-    Loc dst, arg, fn;   /* values we reduced */
+    Loc *eax, *esp;       /* hard-coded registers */
+    Loc *stkbump;        /* calculated stack offset */
+    Loc *dst, *arg, *fn;   /* values we reduced */
 
-    locreg(&esp, Resp);
-    locreg(&eax, Reax);
+    esp = locphysreg(Resp);
+    eax = locphysreg(Reax);
     argsz = 0;
     /* Have to calculate the amount to bump the stack
      * pointer by in one pass first, otherwise if we push
@@ -342,69 +324,64 @@
      * We skip the first operand, since it's the function itself */
     for (i = 1; i < n->expr.nargs; i++)
         argsz += size(n->expr.args[i]);
-    loclit(&stkbump, argsz);
+    stkbump = loclit(argsz);
     if (argsz)
-        g(s, Isub, &stkbump, &esp, NULL);
+        g(s, Isub, stkbump, esp, NULL);
 
     /* Now, we can evaluate the arguments */
     argoff = 0;
     for (i = 1; i < n->expr.nargs; i++) {
-        selexpr(s, n->expr.args[i]);
-        arg = inri(s, getloc(s, n->expr.args[i]));
-        locmem(&dst, argoff, Resp, Rnone, arg.mode);
-        stor(s, &arg, &dst);
+        arg = selexpr(s, n->expr.args[i]);
+        arg = inri(s, arg);
+        dst = locmem(argoff, esp, NULL, arg->mode);
+        stor(s, arg, dst);
         argsz += size(n->expr.args[i]);
-        freeloc(s, arg);
-        freeloc(s, dst);
     }
-    selexpr(s, n->expr.args[0]);
-    fn = getloc(s, n->expr.args[0]);
-    g(s, Icall, &fn, NULL);
-    claimreg(s, Reax);
+    fn = selexpr(s, n->expr.args[0]);
+    g(s, Icall, fn, NULL);
     if (argsz)
-        g(s, Iadd, &stkbump, &esp, NULL);
+        g(s, Iadd, stkbump, esp, NULL);
     return eax;
 }
 
-static void blit(Isel *s, Loc a, Loc b, int sz)
+static void blit(Isel *s, Loc *a, Loc *b, int sz)
 {
     int i;
-    Reg sp, dp; /* pointers to src, dst */
-    Loc tmp, src, dst; /* source memory, dst memory */
+    Loc *sp, *dp; /* pointers to src, dst */
+    Loc *tmp, *src, *dst; /* source memory, dst memory */
 
-    sp = inr(s, a).reg;
-    dp = inr(s, b).reg;
+    sp = inr(s, a);
+    dp = inr(s, b);
 
     /* Slightly funny loop condition: We might have trailing bytes
      * that we can't blit word-wise. */
-    tmp = getreg(s, ModeL);
+    tmp = locreg(ModeL);
     for (i = 0; i + 4 <= sz; i+= 4) {
-        locmem(&src, i, sp, Rnone, ModeL);
-        locmem(&dst, i, dp, Rnone, ModeL);
-        g(s, Imov, &src, &tmp, NULL);
-        g(s, Imov, &tmp, &dst, NULL);
+        src = locmem(i, sp, NULL, ModeL);
+        dst = locmem(i, dp, NULL, ModeL);
+        g(s, Imov, src, tmp, NULL);
+        g(s, Imov, tmp, dst, NULL);
     }
     /* now, the trailing bytes */
-    tmp = coreg(tmp, ModeB);
+    tmp = locreg(ModeB);
     for (; i < sz; i++) {
-        locmem(&src, i, sp, Rnone, ModeB);
-        locmem(&dst, i, dp, Rnone, ModeB);
-        g(s, Imov, &src, &tmp, NULL);
-        g(s, Imov, &tmp, &dst, NULL);
+        src = locmem(i, sp, NULL, ModeB);
+        dst = locmem(i, dp, NULL, ModeB);
+        g(s, Imov, src, tmp, NULL);
+        g(s, Imov, tmp, dst, NULL);
     }
 }
 
-void selexpr(Isel *s, Node *n)
+Loc *selexpr(Isel *s, Node *n)
 {
-    Loc a, b, c, r;
-    Loc eax, edx, cl; /* x86 wanst some hard-coded regs */
+    Loc *a, *b, *c, *d, *r;
+    Loc *eax, *edx, *cl; /* x86 wanst some hard-coded regs */
     Node **args;
 
     args = n->expr.args;
-    r = (Loc){Locnone, };
-    locreg(&eax, Reax);
-    locreg(&edx, Redx);
-    locreg(&cl, Rcl);
+    eax = locphysreg(Reax);
+    edx = locphysreg(Redx);
+    cl = locphysreg(Rcl);
     switch (exprop(n)) {
         case Oadd:      r = binop(s, Iadd, args[0], args[1]); break;
         case Osub:      r = binop(s, Isub, args[0], args[1]); break;
@@ -414,122 +391,109 @@
 
         case Omul:
             /* these get clobbered by the mul insn */
-            selexpr(s, args[0]);
-            selexpr(s, args[1]);
-            claimreg(s, Reax);
-            claimreg(s, Redx);
-            a = getloc(s, args[0]);
-            b = inr(s, getloc(s, args[1]));
-            c = coreg(eax, mode(n));
-            g(s, Imov, &a, &c, NULL);
-            g(s, Imul, &b, NULL);
-            freereg(s, Redx);
-            r = eax;
+            a = selexpr(s, args[0]);
+            b = selexpr(s, args[1]);
+            b = inr(s, b);
+            c = coreg(Reax, mode(n));
+            r = locreg(a->mode);
+            g(s, Imov, a, c, NULL);
+            g(s, Imul, b, NULL);
+            g(s, Imov, eax, r, NULL);
             break;
         case Odiv:
         case Omod:
             /* these get clobbered by the div insn */
-            selexpr(s, args[0]);
-            selexpr(s, args[1]);
-            claimreg(s, Reax);
-            claimreg(s, Redx);
-            a = getloc(s, args[0]);
-            b = inr(s, getloc(s, args[1]));
-            c = coreg(eax, mode(n));
-            g(s, Imov, &a, &c, NULL);
-            g(s, Ixor, &edx, &edx, NULL);
-            g(s, Idiv, &b, NULL);
-            freereg(s, Redx);
+            a = selexpr(s, args[0]);
+            b = selexpr(s, args[1]);
+            b = inr(s, b);
+            c = coreg(Reax, mode(n));
+            r = locreg(a->mode);
+            g(s, Imov, a, c, NULL);
+            g(s, Ixor, edx, edx, NULL);
+            g(s, Idiv, b, NULL);
             if (exprop(n) == Odiv)
-                r = eax;
+                d = eax;
             else
-                r = edx;
+                d = edx;
+            g(s, Imov, d, r, NULL);
             break;
         case Oneg:
-            selexpr(s, args[0]);
-            r = inr(s, getloc(s, args[0]));
-            g(s, Ineg, &r, NULL);
+            r = selexpr(s, args[0]);
+            r = inr(s, r);
+            g(s, Ineg, r, NULL);
             break;
 
         case Obsl:
         case Obsr:
-            selexpr(s, args[0]);
-            selexpr(s, args[1]);
-            a = inr(s, getloc(s, args[0]));
-            b = getloc(s, args[1]);
-            /* FIXME: we can shift by constants */
-            claimreg(s, Rcl); /* shift requires cl as it's arg. stupid. */
-            c = coreg(cl, b.mode);
-            g(s, Imov, &b, &c, NULL);
+            a = selexpr(s, args[0]);
+            a = inr(s, a);
+            b = selexpr(s, args[1]);
+            c = coreg(Rcl, b->mode);
+            g(s, Imov, b, c, NULL);
             if (exprop(n) == Obsr) {
                 if (istysigned(n->expr.type))
-                    g(s, Isar, &cl, &a, NULL);
+                    g(s, Isar, cl, a, NULL);
                 else
-                    g(s, Ishr, &cl, &a, NULL);
+                    g(s, Ishr, cl, a, NULL);
             } else {
-                g(s, Ishl, &cl, &a, NULL);
+                g(s, Ishl, cl, a, NULL);
             }
-            freeloc(s, cl);
-            freeloc(s, b);
             r = a;
             break;
         case Obnot:
-            selexpr(s, args[0]);
-            r = inrm(s, getloc(s, args[0]));
-            g(s, Inot, &r, NULL);
+            r = selexpr(s, args[0]);
+            r = inrm(s, r);
+            g(s, Inot, r, NULL);
             break;
 
         case Oderef:
-            selexpr(s, args[0]);
-            a = inr(s, getloc(s, args[0]));
-            r = getreg(s, a.mode);
-            locmem(&c, 0, a.reg, Rnone, a.mode);
-            g(s, Imov, &c, &r, NULL);
+            a = selexpr(s, args[0]);
+            a = inr(s, a);
+            r = locreg(a->mode);
+            c = locmem(0, a, Rnone, a->mode);
+            g(s, Imov, c, r, NULL);
             break;
 
         case Oaddr:
-            selexpr(s, args[0]);
-            a = getloc(s, args[0]);
-            r = getreg(s, ModeL);
-            g(s, Ilea, &a, &r, NULL);
+            a = selexpr(s, args[0]);
+            r = locreg(ModeL);
+            g(s, Ilea, a, r, NULL);
             break;
 
         case Olnot:
-            selexpr(s, args[0]);
-            a = getloc(s, args[0]);
-            b = getreg(s, ModeB);
-            r = coreg(b, mode(n));
-            g(s, reloptab[exprop(n)].test, &a, &a, NULL);
-            g(s, reloptab[exprop(n)].getflag, &b, NULL);
-            g(s, Imovz, &b, &r, NULL);
+            a = selexpr(s, args[0]);
+            b = locreg(ModeB);
+            r = locreg(mode(n));
+            g(s, reloptab[exprop(n)].test, a, a, NULL);
+            g(s, reloptab[exprop(n)].getflag, b, NULL);
+            g(s, Imovz, b, r, NULL);
             break;
 
         case Oeq: case One: case Ogt: case Oge: case Olt: case Ole:
-            selexpr(s, args[0]);
-            selexpr(s, args[1]);
-            a = getloc(s, args[0]);
-            b = inr(s, getloc(s, args[1]));
-            c = getreg(s, ModeB);
-            r = coreg(c, mode(n));
-            g(s, reloptab[exprop(n)].test, &a, &b, NULL);
-            g(s, reloptab[exprop(n)].getflag, &c, NULL);
-            g(s, Imovz, &c, &r, NULL);
-            break;
+            a = selexpr(s, args[0]);
+            b = selexpr(s, args[1]);
+            b = inr(s, b);
+            c = locreg(ModeB);
+            r = locreg(mode(n));
+            g(s, reloptab[exprop(n)].test, a, b, NULL);
+            g(s, reloptab[exprop(n)].getflag, c, NULL);
+            g(s, Imovz, c, r, NULL);
+            return r;
 
         case Oasn:  /* relabel */
             die("Unimplemented op %s", opstr(exprop(n)));
             break;
         case Ostor: /* reg -> mem */
-            selexpr(s, args[1]);
+            b = selexpr(s, args[1]);
             a = memloc(s, args[0], mode(n));
-            b = inri(s, getloc(s, args[1]));
-            g(s, Imov, &b, &a, NULL);
+            b = inri(s, b);
+            g(s, Imov, b, a, NULL);
             r = b;
             break;
         case Oload: /* mem -> reg */
             b = memloc(s, args[0], mode(n));
-            r = getreg(s, mode(n));
-            g(s, Imov, &b, &r, NULL);
+            r = locreg(mode(n));
+            g(s, Imov, b, r, NULL);
             break;
 
         case Ocall:
@@ -537,7 +501,7 @@
             break;
         case Ocast: die("Unimplemented op %s", opstr(exprop(n))); break;
         case Ojmp:
-            g(s, Ijmp, loclbl(&a, args[0]), NULL);
+            g(s, Ijmp, a = loclbl(args[0]), NULL);
             break;
         case Ocjmp:
             selcjmp(s, n, args);
@@ -548,25 +512,23 @@
             r = loc(s, n);
             break;
         case Olbl:
-            loclbl(&r, args[0]);
+            r = loclbl(args[0]);
             break;
         case Oblit:
-            selexpr(s, args[0]);
-            selexpr(s, args[1]);
-            b = getloc(s, args[0]);
-            a = getloc(s, args[1]);
+            b = selexpr(s, args[0]);
+            a = selexpr(s, args[1]);
             blit(s, a, b, args[2]->expr.args[0]->lit.intval);
             r = b;
             break;
         case Oslbase:
-            selexpr(s, args[0]);
-            a = inr(s, getloc(s, args[0]));
-            locmem(&r, 0, a.reg, Rnone, ModeL);
+            a = selexpr(s, args[0]);
+            a = inr(s, a);
+            r = locmem(0, a, Rnone, ModeL);
             break;
         case Osllen:
-            selexpr(s, args[0]);
-            a = inr(s, getloc(s, args[0]));
-            locmem(&r, 4, a.reg, Rnone, ModeL);
+            a = selexpr(s, args[0]);
+            a = inr(s, a);
+            r = locmem(4, a, Rnone, ModeL);
             break;
 
         /* These operators should never show up in the reduced trees,
@@ -581,7 +543,7 @@
             die("Should not see %s in isel", opstr(exprop(n)));
             break;
     }
-    in(s, n, r);
+    return r;
 }
 
 void locprint(FILE *fd, Loc *l)
@@ -592,13 +554,10 @@
             fprintf(fd, "%s", l->lbl);
             break;
         case Locreg:
-            fprintf(fd, "%s", regnames[l->reg]);
-            break;
-        case Locpseu:
-            if (debug)
-              fprintf(fd, "%c%lu", modenames[l->mode], l->pseudo);
+            if (l->reg.colour == Rnone)
+                fprintf(fd, "%%P.%ld", l->reg.id);
             else
-              die("Trying to print pseudoreg %lu", l->pseudo);
+                fprintf(fd, "%s", regnames[l->reg.colour]);
             break;
         case Locmem:
         case Locmeml:
@@ -609,10 +568,12 @@
                 if (l->mem.lbldisp)
                     fprintf(fd, "%s", l->mem.lbldisp);
             }
-            if (l->mem.base)
-                fprintf(fd, "(%s", regnames[l->mem.base]);
-            if (l->mem.idx)
-                fprintf(fd, ",%s", regnames[l->mem.idx]);
+            fprintf(fd, "(");
+            locprint(fd, l->mem.base);
+            if (l->mem.idx) {
+                fprintf(fd, ",");
+                locprint(fd, l->mem.idx);
+            }
             if (l->mem.scale)
                 fprintf(fd, ",%d", l->mem.scale);
             if (l->mem.base)
@@ -651,17 +612,19 @@
             case 'l':
             case 'x':
             case 'v':
-                locprint(fd, &insn->args[i]);
+                locprint(fd, insn->args[i]);
                 i++;
                 break;
             case 't':
                 modeidx = 0;
             default:
+                /* the  asm description uses 1-based indexing, so that 0
+                 * can be used as a sentinel. */
                 if (isdigit(*p))
-                    modeidx = strtol(p, &p, 10);
+                    modeidx = strtol(p, &p, 10) - 1;
 
                 if (*p == 't')
-                    fputc(modenames[insn->args[modeidx].mode], fd);
+                    fputc(modenames[insn->args[modeidx]->mode], fd);
                 else
                     die("Invalid %%-specifier '%c'", *p);
                 break;
@@ -673,11 +636,11 @@
 
 static void isel(Isel *s, Node *n)
 {
-    Loc lbl;
+    Loc *lbl;
 
     switch (n->type) {
         case Nlbl:
-            g(s, Ilbl, loclbl(&lbl, n), NULL);
+            g(s, Ilbl, lbl = loclbl(n), NULL);
             break;
         case Nexpr:
             selexpr(s, n);
@@ -690,47 +653,66 @@
     }
 }
 
-static void prologue(Isel *s)
+static void prologue(Isel *s, size_t sz)
 {
-    Loc esp;
-    Loc ebp;
+    Loc *esp;
+    Loc *ebp;
+    Loc *stksz;
 
-    locreg(&esp, Resp);
-    locreg(&ebp, Rebp);
-    loclit(&s->stkszloc, s->stksz);
-    g(s, Ipush, &ebp, NULL);
-    g(s, Imov, &esp, &ebp, NULL);
-    g(s, Isub, &s->stkszloc, &esp, NULL);
+    esp = locphysreg(Resp);
+    ebp = locphysreg(Rebp);
+    stksz = loclit(sz);
+    g(s, Ipush, ebp, NULL);
+    g(s, Imov, esp, ebp, NULL);
+    g(s, Isub, stksz, esp, NULL);
+    s->stksz = stksz; /* need to update if we spill */
 }
 
 static void epilogue(Isel *s)
 {
-    Loc esp, ebp, eax;
-    Loc ret;
+    Loc *esp, *ebp, *eax;
+    Loc *ret;
 
-    locreg(&esp, Resp);
-    locreg(&ebp, Rebp);
-    locreg(&eax, Reax);
+    esp = locphysreg(Resp);
+    ebp = locphysreg(Rebp);
+    eax = locphysreg(Reax);
     if (s->ret) {
         ret = loc(s, s->ret);
-        g(s, Imov, &ret, &eax, NULL);
+        g(s, Imov, ret, eax, NULL);
     }
-    g(s, Imov, &ebp, &esp, NULL);
-    g(s, Ipop, &ebp, NULL);
+    g(s, Imov, ebp, esp, NULL);
+    g(s, Ipop, ebp, NULL);
     g(s, Iret, NULL);
 }
 
-static void writeasm(Func *fn, Isel *is, FILE *fd)
+static void writeasm(Func *fn, Isel *s, FILE *fd)
 {
-    size_t i;
+    size_t i, j;
 
     if (fn->isglobl)
         fprintf(fd, ".globl %s\n", fn->name);
     fprintf(fd, "%s:\n", fn->name);
-    for (i = 0; i < is->ni; i++)
-        iprintf(fd, is->il[i]);
+    for (j = 0; j < s->cfg->nbb; j++) {
+        for (i = 0; i < s->bb[j]->nlbls; i++)
+            fprintf(fd, "%s:\n", s->bb[j]->lbls[i]);
+        for (i = 0; i < s->bb[j]->ni; i++)
+            iprintf(fd, s->bb[j]->il[i]);
+    }
 }
 
+static Asmbb *mkasmbb(Bb *bb)
+{
+    Asmbb *as;
+
+    as = zalloc(sizeof(Asmbb));
+    as->id = bb->id;
+    as->pred = bsdup(bb->pred);
+    as->succ = bsdup(bb->succ);
+    as->lbls = memdup(bb->lbls, bb->nlbls*sizeof(char*));
+    as->nlbls = bb->nlbls;
+    return as;
+}
+
 /* genasm requires all nodes in 'nl' to map cleanly to operations that are
  * natively supported, as promised in the output of reduce().  No 64-bit
  * operations on x32, no structures, and so on. */
@@ -737,21 +719,28 @@
 void genasm(FILE *fd, Func *fn, Htab *globls)
 {
     struct Isel is = {0,};
-    size_t i;
+    size_t i, j;
 
     is.locs = fn->locs;
     is.globls = globls;
     is.ret = fn->ret;
-    is.stksz = fn->stksz;
-    is.locmap = zalloc(maxnid * sizeof(Loc));
+    is.cfg = fn->cfg;
 
-    prologue(&is);
-    for (i = 0; i < fn->nn; i++) {
-        bzero(is.rtaken, sizeof is.rtaken);
-        isel(&is, fn->nl[i]);
+    for (i = 0; i < fn->cfg->nbb; i++)
+        lappend(&is.bb, &is.nbb, mkasmbb(fn->cfg->bb[i]));
+
+    is.curbb = is.bb[0];
+    prologue(&is, fn->stksz);
+    for (j = 0; j < fn->cfg->nbb; j++) {
+        is.curbb = is.bb[j];
+        for (i = 0; i < fn->cfg->bb[j]->nnl; i++) {
+            isel(&is, fn->cfg->bb[j]->nl[i]);
+        }
     }
+    is.curbb = is.bb[is.nbb - 1];
     epilogue(&is);
 
+    regalloc(&is);
     if (debug)
       writeasm(fn, &is, stdout);
 
--- a/8/main.c
+++ b/8/main.c
@@ -10,8 +10,8 @@
 #include <unistd.h>
 
 #include "parse.h"
-#include "asm.h"
 #include "opt.h"
+#include "asm.h"
 
 Node *file;
 static char *outfile;
--- /dev/null
+++ b/8/ra.c
@@ -1,0 +1,741 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include "parse.h"
+#include "opt.h"
+#include "asm.h"
+
+#define Sizetbits (CHAR_BIT*sizeof(size_t)) /* used in graph reprs */
+
+typedef struct Usage Usage;
+struct Usage {
+    int l[Maxarg + 1];
+    int r[Maxarg + 1];
+};
+
+Usage usetab[] = {
+#define Use(...) {__VA_ARGS__}
+#define Insn(i, fmt, use, def) use,
+#include "insns.def"
+#undef Insn
+#undef Use
+};
+
+Usage deftab[] = {
+#define Def(...) {__VA_ARGS__}
+#define Insn(i, fmt, use, def) def,
+#include "insns.def"
+#undef Insn
+#undef Def
+};
+
+Reg regmap[6][Nmode] = {
+    [0] = {Rnone, Ral, Rax, Reax},
+    [1] = {Rnone, Rcl, Rcx, Recx},
+    [2] = {Rnone, Rdl, Rdx, Redx},
+    [3] = {Rnone, Rbl, Rbx, Rebx},
+    [4] = {Rnone, Rnone, Rnone, Resp},
+    [5] = {Rnone, Rnone, Rnone, Rebp},
+};
+
+int colourmap[Nreg] = {
+    /* byte */
+    [Ral] = 0,
+    [Rcl] = 1,
+    [Rdl] = 2,
+    [Rbl] = 3,
+
+    /* word */
+    [Rax] = 0,
+    [Rcx] = 1,
+    [Rdx] = 2,
+    [Rbx] = 3,
+
+    /* dword */
+    [Reax] = 0,
+    [Recx] = 1,
+    [Redx] = 2,
+    [Rebx] = 3,
+    [Resp] = 4,
+    [Rebp] = 5,
+};
+
+/* %esp, %ebp are not in the allocatable pool */
+static int isfixreg(Loc *l)
+{
+    if (l->reg.colour == Resp)
+	return 1;
+    if (l->reg.colour == Rebp)
+	return 1;
+    return 0;
+}
+
+static size_t uses(Insn *insn, long *u)
+{
+    size_t i, j;
+    int k;
+    Loc *m;
+
+    j = 0;
+    /* Add all the registers used and defined. Duplicates
+     * in this list are fine, since they're being added to
+     * a set anyways */
+    for (i = 0; i < Maxarg; i++) {
+	if (!usetab[insn->op].l[i])
+	    break;
+	k = usetab[insn->op].l[i] - 1;
+	/* non-registers are handled later */
+	if (insn->args[k]->type == Locreg)
+	    if (!isfixreg(insn->args[k]))
+		u[j++] = insn->args[k]->reg.id;
+    }
+    /* some insns don't reflect their defs in the args.
+     * These are explictly listed in the insn description */
+    for (i = 0; i < Maxarg; i++) {
+	if (!usetab[insn->op].r[i])
+	    break;
+	/* not a leak; physical registers get memoized */
+	u[j++] = locphysreg(usetab[insn->op].r[i])->reg.id;
+    }
+    /* If the registers are in an address calculation,
+     * they're used no matter what. */
+    for (i = 0; i < insn->nargs; i++) {
+	m = insn->args[i];
+	if (m->type != Locmem && m->type != Locmeml)
+	    continue;
+	if (!isfixreg(m->mem.base))
+	    u[j++] = m->mem.base->reg.id;
+	if (m->mem.idx)
+	    if (!isfixreg(m->mem.base))
+		u[j++] = m->mem.idx->reg.id;
+    }
+    return j;
+}
+
+static size_t defs(Insn *insn, long *d)
+{
+    size_t i, j;
+    int k;
+
+    j = 0;
+    /* Add all the registers dsed and defined. Duplicates
+     * in this list are fine, since they're being added to
+     * a set anyways */
+    for (i = 0; i < Maxarg; i++) {
+	if (!deftab[insn->op].l[i])
+	    break;
+	k = deftab[insn->op].l[i] - 1;
+	if (insn->args[k]->type == Locreg)
+	    if (!isfixreg(insn->args[k]))
+		d[j++] = insn->args[k]->reg.id;
+    }
+    /* some insns don't reflect their defs in the args.
+     * These are explictly listed in the insn description */
+    for (i = 0; i < Maxarg; i++) {
+	if (!deftab[insn->op].r[i])
+	    break;
+	/* not a leak; physical registers get memoized */
+	d[j++] = locphysreg(deftab[insn->op].r[i])->reg.id;
+    }
+    return j;
+}
+
+static void udcalc(Asmbb *bb)
+{
+    /* up to 2 registers per memloc, so
+     * 2*Maxarg is the maximum number of
+     * uses or defs we can see */
+    long   u[2*Maxarg], d[2*Maxarg];
+    size_t nu, nd;
+    size_t i, j;
+
+    bb->use = bsclear(bb->use);
+    bb->def = bsclear(bb->def);
+    for (i = 0; i < bb->ni; i++) {
+	nu = uses(bb->il[i], u);
+	nd = defs(bb->il[i], d);
+	for (j = 0; j < nu; j++)
+	    if (!bshas(bb->def, u[j]))
+		bsput(bb->use, u[j]);
+	for (j = 0; j < nd; j++)
+	    bsput(bb->def, d[j]);
+    }
+}
+
+static void liveness(Isel *s)
+{
+    Bitset *old;
+    Asmbb **bb;
+    size_t nbb;
+    size_t i, j;
+    int changed;
+
+    bb = s->bb;
+    nbb = s->nbb;
+    for (i = 0; i < nbb; i++) {
+	udcalc(s->bb[i]);
+	bb[i]->livein = bsclear(bb[i]->livein);
+	bb[i]->liveout = bsclear(bb[i]->liveout);
+    }
+
+    changed = 1;
+    while (changed) {
+	changed = 0;
+	for (i = 0; i < nbb; i++) {
+	    old = bsdup(bb[i]->liveout);
+	    /* liveout[b] = U(s in succ) livein[s] */
+	    for (j = 0; bsiter(bb[i]->succ, &j); j++)
+		bsunion(bb[i]->liveout, bb[j]->livein);
+	    /* livein[b] = use[b] U (out[b] \ def[b]) */
+	    bb[i]->livein = bsclear(bb[i]->livein);
+	    bsunion(bb[i]->livein, bb[i]->liveout);
+	    bsdiff(bb[i]->livein, bb[i]->def);
+	    bsunion(bb[i]->livein, bb[i]->use);
+	    if (!changed)
+		changed = !bseq(old, bb[i]->liveout);
+	}
+    }
+}
+
+/* we're only interested in register->register moves */
+static int ismove(Insn *i)
+{
+    if (i->op != Imov)
+	return 0;
+    return i->args[0]->type == Locreg && i->args[1]->type == Locreg;
+}
+
+/* static */ int gbhasedge(Isel *s, size_t u, size_t v)
+{
+    size_t i;
+    i = (maxregid * v) + u;
+    return (s->gbits[i/Sizetbits] & (1ULL <<(i % Sizetbits))) != 0;
+}
+
+static void gbputedge(Isel *s, size_t u, size_t v)
+{
+    size_t i, j;
+
+    i = (maxregid * u) + v;
+    j = (maxregid * v) + u;
+    s->gbits[i/Sizetbits] |= 1ULL <<(i % Sizetbits);
+    s->gbits[j/Sizetbits] |= 1ULL <<(j % Sizetbits);
+    assert(gbhasedge(s, u, v) && gbhasedge(s, v, u));
+}
+
+static void addedge(Isel *s, size_t u, size_t v)
+{
+    if (u == v || gbhasedge(s, u, v))
+	return;
+    gbputedge(s, u, v);
+    gbputedge(s, v, u);
+    if (!bshas(s->prepainted, u)) {
+	bsput(s->gadj[u], v);
+	s->degree[u]++;
+    }
+    if (!bshas(s->prepainted, v)) {
+	bsput(s->gadj[v], u);
+	s->degree[v]++;
+    }
+}
+
+void setup(Isel *s)
+{
+    Bitset **gadj;
+    size_t gchunks;
+    size_t i;
+
+    free(s->gbits);
+    gchunks = (maxregid*maxregid)/Sizetbits + 1;
+    s->gbits = zalloc(gchunks*sizeof(size_t));
+    /* fresh adj list repr. try to avoid reallocating all the bitsets */
+    gadj = zalloc(maxregid * sizeof(Bitset*));
+    if (s->gadj)
+	for (i = 0; i < maxregid; i++)
+	    gadj[i] = bsclear(s->gadj[i]);
+    else
+	for (i = 0; i < maxregid; i++)
+	    gadj[i] = mkbs();
+    free(s->gadj);
+    s->gadj = gadj;
+
+    s->spilled = bsclear(s->spilled);
+    s->coalesced = bsclear(s->coalesced);
+    /*
+    s->wlspill = bsclear(s->wlspill);
+    s->wlfreeze = bsclear(s->wlfreeze);
+    s->wlsimp = bsclear(s->wlsimp);
+    */
+
+    s->aliasmap = zalloc(maxregid * sizeof(size_t));
+    s->degree = zalloc(maxregid * sizeof(int));
+    s->rmoves = zalloc(maxregid * sizeof(Loc **));
+    s->nrmoves = zalloc(maxregid * sizeof(size_t));
+}
+
+static void build(Isel *s)
+{
+    long u[2*Maxarg], d[2*Maxarg];
+    size_t nu, nd;
+    size_t i, k;
+    ssize_t j;
+    Bitset *live;
+    Asmbb **bb;
+    size_t nbb;
+    Insn *insn;
+    size_t l;
+
+    /* set up convenience vars */
+    bb = s->bb;
+    nbb = s->nbb;
+
+    for (i = 0; i < nbb; i++) {
+	live = bsdup(bb[i]->liveout);
+	for (j = bb[i]->ni - 1; j >= 0; j--) {
+	    insn = bb[i]->il[j];
+	    nu = uses(insn, u);
+	    nd = defs(insn, d);
+
+	    /* moves get special treatment, since we don't want spurious
+	     * edges between the src and dest */
+	    if (ismove(insn)) {
+		/* live \= uses(i) */
+		for (k = 0; k < nu; k++)
+		    bsdel(live, u[k]);
+
+		for (k = 0; k < nu; k++)
+		    lappend(&s->rmoves[u[k]], &s->nrmoves[u[k]], insn);
+		for (k = 0; k < nd; k++)
+		    lappend(&s->rmoves[d[k]], &s->nrmoves[d[k]], insn);
+		lappend(&s->wlmove, &s->nwlmove, insn);
+	    }
+	    /* live = live U def(i) */
+	    for (k = 0; k < nd; k++)
+		bsput(live, d[k]);
+
+	    for (k = 0; k < nd; k++)
+		for (l = 0; bsiter(live, &l); l++)
+		    addedge(s, d[k], l);
+	    /* live = use(i) U (live \ def(i)) */
+	    for (k = 0; k < nd; k++)
+		bsdel(live, d[k]);
+	    for (k = 0; k < nu; k++)
+		bsput(live, u[k]);
+	}
+    }
+}
+
+Bitset *adjacent(Isel *s, regid n)
+{
+    Bitset *r;
+    size_t i;
+
+    r = bsdup(s->gadj[n]);
+    for (i = 0; i < s->nselstk; i++)
+	bsdel(r, s->selstk[i]->reg.id);
+    bsdiff(r, s->coalesced);
+    return r;
+}
+
+size_t nodemoves(Isel *s, regid n, Insn ***pil)
+{
+    size_t i, j;
+    size_t count;
+
+    /* FIXME: inefficient. Do I care? */
+    count = 0;
+    if (pil)
+	*pil = NULL;
+    for (i = 0; i < s->nrmoves[n]; i++) {
+	for (j = 0; j < s->nmactive; j++) {
+	    if (s->mactive[j] == s->rmoves[n][i]) {
+		if (pil)
+		    lappend(pil, &count, s->rmoves[n][i]);
+		continue;
+	    }
+	}
+	for (j = 0; j < s->nwlmove; j++) {
+	    if (s->wlmove[j] == s->rmoves[n][i]) {
+		if (pil)
+		    lappend(pil, &count, s->rmoves[n][i]);
+		continue;
+	    }
+	}
+    }
+    return count;
+}
+
+static int moverelated(Isel *s, regid n)
+{
+    return nodemoves(s, n, NULL) != 0;
+}
+
+/* static */ void mkworklist(Isel *s)
+{
+    size_t i;
+
+    for (i = 0; i < maxregid; i++) {
+	if (bshas(s->prepainted, i))
+	    continue;
+	else if (s->degree[i] >= K)
+	    lappend(&s->wlspill, &s->nwlspill, locmap[i]);
+	else if (moverelated(s, i))
+	    lappend(&s->wlfreeze, &s->nwlfreeze, locmap[i]);
+	else
+	    lappend(&s->wlsimp, &s->nwlsimp, locmap[i]);
+    }
+}
+
+void enablemove(Isel *s, regid n)
+{
+    size_t i, j;
+    Insn **il;
+    size_t ni;
+
+    ni = nodemoves(s, n, &il);
+    for (i = 0; i < ni; i++) {
+	for (j = 0; j < s->nmactive; j++) {
+	    if (il[i] == s->mactive[j]) {
+		ldel(&s->mactive, &s->nmactive, j);
+		lappend(&s->wlmove, &s->nwlmove, il[i]);
+	    }
+	}
+    }
+}
+
+void decdegree(Isel *s, regid n)
+{
+    int d;
+    regid m;
+    Bitset *adj;
+
+    d = s->degree[n];
+    s->degree[n]--;
+
+    if (d == K) {
+	enablemove(s, n);
+	adj = adjacent(s, n);
+	for (m = 0; bsiter(adj, &m); m++)
+	    enablemove(s, n);
+	bsfree(adj);
+    }
+}
+
+void simp(Isel *s)
+{
+    Loc *l;
+    Bitset *adj;
+    regid m;
+
+    l = lpop(&s->wlsimp, &s->nwlsimp);
+    lappend(&s->selstk, &s->nselstk, l);
+    adj = adjacent(s, l->reg.id);
+    for (m = 0; bsiter(adj, &m); m++)
+	decdegree(s, m);
+    bsfree(adj);
+}
+
+regid getalias(Isel *s, regid id)
+{
+    while (1) {
+	if (!s->aliasmap[id])
+	    break;
+	id = s->aliasmap[id]->reg.id;
+    };
+    return id;
+}
+
+void wladd(Isel *s, regid u)
+{
+    size_t i;
+
+    if (bshas(s->prepainted, u))
+	return;
+    if (moverelated(s, u))
+	return;
+    if (s->degree[u] >= K)
+	return;
+    for (i = 0; i < s->nwlfreeze; i++)
+	if (s->wlfreeze[i]->reg.id == u)
+	    ldel(&s->wlfreeze, &s->nwlfreeze, i);
+    lappend(&s->wlsimp, &s->nwlsimp, locmap[u]);
+}
+
+int conservative(Isel *s, regid u, regid v)
+{
+    int k;
+    regid n;
+    size_t i;
+    Bitset *uadj;
+    Bitset *vadj;
+
+    uadj = adjacent(s, u);
+    vadj = adjacent(s, u);
+    k = 0;
+    for (i = 0; bsiter(uadj, &n); i++)
+	if (s->degree[n] >= K)
+	    k++;
+    for (i = 0; bsiter(vadj, &n); i++)
+	if (s->degree[n] >= K)
+	    k++;
+    bsfree(uadj);
+    bsfree(vadj);
+    return k < K;
+}
+
+/* FIXME: is this actually correct? */
+int ok(Isel *s, regid t, regid r)
+{
+    if (s->degree[t] >= K)
+	return 0;
+    if (!bshas(s->prepainted, t))
+	return 0;
+    if (!gbhasedge(s, t, r))
+	return 0;
+    return 1;
+}
+
+int combinable(Isel *s, regid u, regid v)
+{
+    regid t;
+    Bitset *adj;
+
+    /* if u isn't prepainted, can we conservatively coalesce? */
+    if (!bshas(s->prepainted, u) && conservative(s, u, v))
+	return 1;
+
+    /* if it is, are the adjacent nodes ok to combine with this? */
+    adj = adjacent(s, u);
+    for (t = 0; bsiter(adj, &t); t++)
+	if (!ok(s, t, u))
+	    return 0;
+    bsfree(adj);
+    return 1;
+}
+
+void combine(Isel *s, regid u, regid v)
+{
+    printf("Want to combine ");
+    locprint(stdout, locmap[u]);
+    printf(" with ");
+    locprint(stdout, locmap[v]);
+    printf("\n");
+}
+
+void coalesce(Isel *s)
+{
+    Insn *m;
+    regid u, v, tmp;
+
+    m = lpop(&s->wlmove, &s->nwlmove);
+    /* FIXME: src, dst? dst, src? Does it matter? */
+    u = getalias(s, m->args[0]->reg.id);
+    v = getalias(s, m->args[1]->reg.id);
+
+    if (bshas(s->prepainted, v)) {
+	tmp = u;
+	u = v;
+	v = tmp;
+    }
+
+    if (u == v) {
+	lappend(&s->mcoalesced, &s->nmcoalesced, m);
+	wladd(s, u);
+	wladd(s, v);
+    } else if (bshas(s->prepainted, v) || gbhasedge(s, u, v)) {
+	lappend(&s->mconstrained, &s->nmconstrained, m);
+	wladd(s, u);
+	wladd(s, v);
+    } else if (combinable(s, u, v)) {
+	lappend(&s->mcoalesced, &s->nmcoalesced, m);
+	combine(s, u, v);
+	wladd(s, u);
+    } else {
+	lappend(&s->mactive, &s->nmactive, m);
+    }
+}
+
+void freezemoves(Isel *s, regid u)
+{
+    die("Implement freeze moves\n");
+}
+
+void freeze(Isel *s)
+{
+    Loc *l;
+
+    l = lpop(&s->wlfreeze, &s->nwlfreeze);
+    lappend(&s->wlsimp, &s->nwlsimp, l);
+    freezemoves(s, l->reg.id);
+}
+
+void spill(Isel *s)
+{
+    die("Implement spilling\n");
+}
+
+int paint(Isel *s)
+{
+    int taken[K + 2]; /* esp, ebp aren't "real colours" */
+    Bitset *adj;
+    Loc *n, *w;
+    regid l;
+    size_t i;
+    int spilled;
+    int found;
+
+    spilled = 0;
+    while (s->nselstk) {
+	bzero(taken, K*sizeof(int));
+	n = lpop(&s->selstk, &s->nselstk);
+
+	adj = adjacent(s, n->reg.id);
+	for (l = 0; bsiter(adj, &l); l++) {
+	    w = locmap[getalias(s, l)];
+	    if (w->reg.colour)
+		taken[colourmap[w->reg.colour]] = 1;
+	}
+	bsfree(adj);
+
+	found = 0;
+	for (i = 0; i < K; i++) {
+	    if (!taken[i]) {
+		n->reg.colour = regmap[i][n->mode];
+		found = 1;
+		break;
+	    }
+	}
+	if (!found) {
+	    spilled = 1;
+	    bsput(s->spilled, n->reg.id);
+	}
+    }
+    for (l = 0; bsiter(s->coalesced, &l); l++) {
+	n = locmap[getalias(s, l)];
+	locmap[l]->reg.colour = n->reg.colour;
+    }
+    return spilled;
+}
+
+void rewrite(Isel *s)
+{
+    die("Rewrite spills!");
+}
+
+void regalloc(Isel *s)
+{
+    size_t i;
+    int spilled;
+
+    s->prepainted = mkbs();
+    for (i = 0; i < maxregid; i++)
+	if (locmap[i]->reg.colour)
+	    bsput(s->prepainted, i);
+    do {
+	setup(s);
+	liveness(s);
+	build(s);
+	mkworklist(s);
+	if (debug)
+	    dumpasm(s, stdout);
+	do {
+	    if (s->nwlsimp)
+		simp(s);
+	    else if (s->nwlmove)
+		coalesce(s);
+	    else if (s->nwlfreeze)
+		freeze(s);
+	    else if (s->nwlspill)
+		spill(s);
+	} while (s->nwlsimp || s->nwlmove || s->nwlfreeze || s->nwlspill);
+	spilled = paint(s);
+	if (spilled)
+	    rewrite(s);
+    } while (spilled);
+
+}
+
+static void setprint(FILE *fd, Bitset *s)
+{
+    char *sep;
+    size_t i;
+
+    sep = "";
+    for (i = 0; i < bsmax(s); i++) {
+	if (bshas(s, i)) {
+	    fprintf(fd, "%s%zd", sep, i);
+	    sep = ",";
+	}
+    }
+    fprintf(fd, "\n");
+}
+
+static void locsetprint(FILE *fd, Bitset *s)
+{
+    char *sep;
+    size_t i;
+
+    sep = "";
+    for (i = 0; i < bsmax(s); i++) {
+	if (bshas(s, i)) {
+	    fprintf(fd, "%s", sep);
+	    locprint(fd, locmap[i]);
+	    sep = ",";
+	}
+    }
+    fprintf(fd, "\n");
+}
+
+void dumpasm(Isel *s, FILE *fd)
+{
+    size_t i, j;
+    char *sep;
+    Asmbb *bb;
+
+    fprintf(fd, "IGRAPH ----- \n");
+    for (i = 0; i < maxregid; i++) {
+	for (j = i; j < maxregid; j++) {
+	    if (gbhasedge(s, i, j)) {
+		locprint(fd, locmap[i]);
+		fprintf(fd, " -- ");
+		locprint(fd, locmap[j]);
+		fprintf(fd, "\n");
+	    }
+	}
+    }
+    fprintf(fd, "ASM -------- \n");
+    for (j = 0; j < s->nbb; j++) {
+        bb = s->bb[j];
+        fprintf(fd, "\n");
+        fprintf(fd, "Bb: %d labels=(", bb->id);
+        sep = "";
+        for (i = 0; i < bb->nlbls; i++) {;
+            fprintf(fd, "%s%s", bb->lbls[i], sep);
+            sep = ",";
+        }
+        fprintf(fd, ")\n");
+
+        fprintf(fd, "Pred: ");
+	setprint(fd, bb->pred);
+        fprintf(fd, "Succ: ");
+	setprint(fd, bb->succ);
+
+        fprintf(fd, "Use: ");
+	locsetprint(fd, bb->use);
+        fprintf(fd, "Def: ");
+	locsetprint(fd, bb->def);
+        fprintf(fd, "Livein:  ");
+	locsetprint(fd, bb->livein);
+        fprintf(fd, "Liveout: ");
+	locsetprint(fd, bb->liveout);
+        for (i = 0; i < bb->ni; i++)
+            iprintf(fd, bb->il[i]);
+    }
+    fprintf(fd, "ENDASM -------- \n");
+}
+
--- a/8/reduce.c
+++ b/8/reduce.c
@@ -10,8 +10,8 @@
 #include <unistd.h>
 
 #include "parse.h"
-#include "asm.h"
 #include "opt.h"
+#include "asm.h"
 
 #include "platform.h" /* HACK. We need some platform specific code gen behavior. *sigh.* */
 
@@ -633,8 +633,7 @@
     fn.stksz = s.stksz;
     fn.locs = s.locs;
     fn.ret = s.ret;
-    fn.nl = s.stmts;
-    fn.nn = s.nstmts;
+    fn.cfg = cfg;
     genasm(fd, &fn, globls);
 }
 
--- a/8/regalloc.c
+++ b/8/regalloc.c
@@ -11,15 +11,16 @@
 #include <unistd.h>
 
 #include "parse.h"
+#include "opt.h"
 #include "asm.h"
 
-const Mode regmodes[] = {
+Mode regmodes[] = {
 #define Reg(r, name, mode) mode,
 #include "regs.def"
 #undef Reg
 };
 
-const char *regnames[] = {
+char *regnames[] = {
 #define Reg(r, name, mode) name,
 #include "regs.def"
 #undef Reg
@@ -52,8 +53,11 @@
     [Rebp] = {Rebp},
 };
 
-Loc *locstrlbl(Loc *l, char *lbl)
+Loc *locstrlbl(char *lbl)
 {
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
     l->type = Loclbl;
     l->mode = ModeL;
     l->lbl = strdup(lbl);
@@ -60,22 +64,44 @@
     return l;
 }
 
-Loc *loclbl(Loc *l, Node *lbl)
+Loc *loclbl(Node *lbl)
 {
     assert(lbl->type = Nlbl);
-    return locstrlbl(l, lbl->lbl.name);
+    return locstrlbl(lbl->lbl.name);
 }
 
-Loc *locreg(Loc *l, Reg r)
+Loc **locmap = NULL;
+size_t maxregid = 0;
+
+Loc *locreg(Mode m)
 {
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
     l->type = Locreg;
-    l->mode = regmodes[r];
-    l->reg = r;
+    l->mode = m;
+    l->reg.id = maxregid++;
+    locmap = xrealloc(locmap, maxregid * sizeof(Loc*));
+    locmap[l->reg.id] = l;
     return l;
 }
 
-Loc *locmem(Loc *l, long disp, Reg base, Reg idx, Mode mode)
+Loc *locphysreg(Reg r)
 {
+    static Loc *physregs[Nreg] = {0,};
+
+    if (physregs[r])
+        return physregs[r];
+    physregs[r] = locreg(regmodes[r]);
+    physregs[r]->reg.colour = r;
+    return physregs[r];
+}
+
+Loc *locmem(long disp, Loc *base, Loc *idx, Mode mode)
+{
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
     l->type = Locmem;
     l->mode = mode;
     l->mem.constdisp = disp;
@@ -85,15 +111,20 @@
     return l;
 }
 
-Loc *locmems(Loc *l, long disp, Reg base, Reg idx, int scale, Mode mode)
+Loc *locmems(long disp, Loc *base, Loc *idx, int scale, Mode mode)
 {
-    locmem(l, disp, base, idx, mode);
+    Loc *l;
+
+    l = locmem(disp, base, idx, mode);
     l->mem.scale = scale;
     return l;
 }
 
-Loc *locmeml(Loc *l, char *disp, Reg base, Reg idx, Mode mode)
+Loc *locmeml(char *disp, Loc *base, Loc *idx, Mode mode)
 {
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
     l->type = Locmem;
     l->mode = mode;
     l->mem.lbldisp = strdup(disp);
@@ -103,95 +134,23 @@
     return l;
 }
 
-Loc *locmemls(Loc *l, char *disp, Reg base, Reg idx, int scale, Mode mode)
+Loc *locmemls(char *disp, Loc *base, Loc *idx, int scale, Mode mode)
 {
-    locmeml(l, disp, base, idx, mode);
+    Loc *l;
+
+    l = locmeml(disp, base, idx, mode);
     l->mem.scale = scale;
     return l;
 }
 
 
-Loc *loclit(Loc *l, long val)
+Loc *loclit(long val)
 {
+    Loc *l;
+
+    l = zalloc(sizeof(Loc));
     l->type = Loclit;
     l->mode = ModeL; /* FIXME: what do we do for mode? */
     l->lit = val;
     return l;
-}
-
-void spill(Isel *s, Reg r)
-{
-    size_t i;
-    Loc l;
-    Node *n;
-
-    for (i = 0; i < Nmode; i++) {
-        n = s->rcontains[reginterferes[r][i]];
-        if (!n)
-            continue;
-        if (exprop(n) != Ovar) {
-            s->stksz += tysize(exprtype(n));
-            locmem(&s->locmap[n->nid], s->stksz, Rebp, Rnone, ModeL);
-            g(s, Imov, locreg(&l, r), &s->locmap[n->nid], NULL);
-        }
-        s->rcontains[i] = NULL;
-    }
-}
-
-void in(Isel *s, Node *n, Loc l)
-{
-    s->locmap[n->nid] = l;
-    if (l.type == Locreg)
-        s->rcontains[l.reg] = n;
-}
-
-Loc getreg(Isel *s, Mode m)
-{
-
-    Loc l;
-    int i;
-
-    assert(m != ModeNone);
-    l.reg = Rnone;
-    for (i = 0; i < Nreg; i++) {
-        if (!s->rtaken[i] && regmodes[i] == m) {
-            locreg(&l, i);
-            break;
-        }
-    }
-    if (l.reg == Rnone)
-        die("Not enough registers. Please split your expression and try again (FIXME: implement spilling)");
-    for (i = 0; i < Nmode; i++)
-        s->rtaken[reginterferes[l.reg][i]] = 1;
-
-    return l;
-}
-
-Loc claimreg(Isel *s, Reg r)
-{
-    Loc l;
-    int i;
-
-    if (s->rtaken[r])
-        spill(s, r);
-    for (i = 0; i < Nmode; i++)
-        s->rtaken[reginterferes[r][i]] = 1;
-    locreg(&l, r);
-    return l;
-}
-
-void freereg(Isel *s, Reg r)
-{
-    int i;
-
-    for (i = 0; i < Nmode; i++) {
-        s->rtaken[reginterferes[r][i]] = 0;
-        s->rcontains[reginterferes[r][i]] = NULL;
-    }
-}
-
-void freeloc(Isel *s, Loc l)
-{
-    if (l.type == Locreg)
-        freereg(s, l.reg);
 }
--- a/mk/c.mk
+++ b/mk/c.mk
@@ -1,9 +1,14 @@
-DEPSDIR = .deps
-DEPS=$(addprefix $(DEPSDIR)/, $(OBJ:.o=.d))
+.DEFAULT_GOAL=all
+_DEPSDIR = .deps
+_DEPS=$(addprefix $(_DEPSDIR)/, $(OBJ:.o=.d))
 
+_LIBSRCHPATHS=$(addprefix -L, $(dir $(DEPS)))
+_LIBINCPATHS=$(addprefix -I, $(dir $(DEPS)))
+_LIBPATHS=$(addprefix -l, $(patsubst lib%.a,%,$(notdir $(DEPS))))
+
 CFLAGS += -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
 CFLAGS += -g
-CFLAGS += -MMD -MP -MF ${DEPSDIR}/$(subst /,-,$*).d
+CFLAGS += -MMD -MP -MF ${_DEPSDIR}/$(subst /,-,$*).d
 
 .PHONY: clean clean-gen clean-bin clean-obj clean-misc clean-backups
 .PHONY: all
@@ -11,12 +16,19 @@
 all: subdirs $(BIN) $(LIB) $(EXTRA)
 install: subdirs-install install-bin install-lib install-hdr install-pc
 
-$(LIB): $(OBJ)
-	$(AR) -rcs $@ $^
+$(LIB): $(OBJ) libs
+	$(AR) -rcs $@ $(OBJ)
 
-$(BIN): $(OBJ) $(EXTRADEP)
-	$(CC) -o $@ $(OBJ) $(LDFLAGS) 
+$(BIN): $(OBJ) $(EXTRADEP) libs
+	$(CC) -o $@ $(OBJ) $(_LIBSRCHPATHS) $(_LIBPATHS)
 
+libs: $(DEPS)
+	@for i in $(dir $(DEPS)); do (\
+	    cd $$i && \
+	    $(MAKE) || \
+	    exit 1 \
+	) || exit 1; done
+
 subdirs:
 	@for i in $(SUB); do (\
 	    cd $$i && \
@@ -64,10 +76,10 @@
 	find ./ -name *.bak -exec rm -f {} \;
 
 %.o: %.c .deps
-	$(CC) -c $(CFLAGS) $<
+	$(CC) -c $(CFLAGS) $(_LIBINCPATHS) $<
 
 .deps: 
-	mkdir -p $(DEPSDIR)
+	mkdir -p $(_DEPSDIR)
 
 	
--include $(DEPS)
+-include $(_DEPS)
--- a/opt/Makefile
+++ b/opt/Makefile
@@ -2,9 +2,7 @@
 OBJ=cfg.o \
     df.o \
 
-CFLAGS+=-I../parse
-LDFLAGS+=-L../parse -lparse
-EXTRADEP=../parse/libparse.a
+DEPS=../parse/libparse.a
 
 include ../mk/lexyacc.mk
 include ../mk/c.mk
--- a/opt/cfg.c
+++ b/opt/cfg.c
@@ -16,13 +16,12 @@
 
 static Bb *mkbb(Cfg *cfg)
 {
-    static int nextbbid = 0;
     Bb *bb;
 
     bb = zalloc(sizeof(Bb));
-    bb->id = nextbbid++;
-    bb->in = mkbs();
-    bb->out = mkbs();
+    bb->id = cfg->nextbbid++;
+    bb->pred = mkbs();
+    bb->succ = mkbs();
     lappend(&cfg->bb, &cfg->nbb, bb);
     return bb;
 }
@@ -53,6 +52,7 @@
 Cfg *mkcfg(Node **nl, size_t nn)
 {
     Cfg *cfg;
+    Bb *pre, *post;
     Bb *bb, *targ;
     Node *a, *b;
     size_t i;
@@ -59,6 +59,7 @@
 
     cfg = zalloc(sizeof(Cfg));
     cfg->lblmap = mkht(strhash, streq);
+    pre = mkbb(cfg);
     bb = mkbb(cfg);
     for (i = 0; i < nn; i++) {
         switch (nl[i]->type) {
@@ -82,6 +83,11 @@
                 die("Invalid node type %s in mkcfg", nodestr(nl[i]->type));
         }
     }
+    post = mkbb(cfg);
+    bsput(pre->succ, cfg->bb[1]->id);
+    bsput(cfg->bb[1]->pred, pre->id);
+    bsput(cfg->bb[cfg->nbb - 2]->succ, post->id);
+    bsput(post->pred, cfg->bb[cfg->nbb - 2]->id);
     for (i = 0; i < cfg->nfixjmp; i++) {
         bb = cfg->fixblk[i];
         switch (exprop(cfg->fixjmp[i])) {
@@ -101,19 +107,20 @@
             targ = htget(cfg->lblmap, a->lbl.name);
             if (!targ)
                 die("No bb with label %s", a->lbl.name);
-            bsput(bb->out, targ->id);
-            bsput(targ->in, bb->id);
+            bsput(bb->succ, targ->id);
+            bsput(targ->pred, bb->id);
         }
         if (b) {
             targ = htget(cfg->lblmap, b->lbl.name);
             if (!targ)
                 die("No bb with label %s", b->lbl.name);
-            bsput(bb->out, targ->id);
-            bsput(targ->in, bb->id);
+            bsput(bb->succ, targ->id);
+            bsput(targ->pred, bb->id);
         }
     }
     return cfg;
 }
+
 void dumpcfg(Cfg *cfg, FILE *fd)
 {
     size_t i, j;
@@ -132,10 +139,10 @@
         fprintf(fd, ")\n");
 
         /* in edges */
-        fprintf(fd, "In:  ");
+        fprintf(fd, "Pred: ");
         sep = "";
-        for (i = 0; i < bsmax(bb->in); i++) {
-            if (bshas(bb->in, i)) {
+        for (i = 0; i < bsmax(bb->pred); i++) {
+            if (bshas(bb->pred, i)) {
                 fprintf(fd, "%s%zd", sep, i);
                 sep = ",";
             }
@@ -143,10 +150,10 @@
         fprintf(fd, "\n");
 
         /* out edges */
-        fprintf(fd, "Out: ");
+        fprintf(fd, "Succ: ");
         sep = "";
-        for (i = 0; i < bsmax(bb->out); i++) {
-             if (bshas(bb->out, i)) {
+        for (i = 0; i < bsmax(bb->succ); i++) {
+             if (bshas(bb->succ, i)) {
                 fprintf(fd, "%s%zd", sep, i);
                 sep = ",";
              }
--- a/opt/opt.h
+++ b/opt/opt.h
@@ -6,6 +6,7 @@
     size_t nbb;
 
     /* for building bb */
+    int nextbbid;
     Htab *lblmap; /* label => Bb mapping */
     Node **fixjmp;
     size_t nfixjmp;
@@ -19,8 +20,8 @@
     size_t nlbls;
     Node **nl;
     size_t nnl;
-    Bitset *in;
-    Bitset *out;
+    Bitset *pred;
+    Bitset *succ;
 };
 
 /* Takes a reduced block, and returns a flow graph. */
--- a/parse/bitset.c
+++ b/parse/bitset.c
@@ -7,7 +7,7 @@
 
 #include "parse.h"
 
-#define Uintbits (CHAR_BIT*sizeof(int))
+#define Sizetbits (CHAR_BIT*sizeof(size_t)) /* used in graph reprs */
 
 static void eqsz(Bitset *a, Bitset *b)
 {
@@ -17,9 +17,9 @@
         sz = a->nchunks;
     else
         sz = b->nchunks;
-    a->chunks = zrealloc(a->chunks, a->nchunks*sizeof(uint), sz*sizeof(uint));
+    a->chunks = zrealloc(a->chunks, a->nchunks*sizeof(size_t), sz*sizeof(size_t));
     a->nchunks = sz;
-    b->chunks = zrealloc(b->chunks, a->nchunks*sizeof(uint), sz*sizeof(uint));
+    b->chunks = zrealloc(b->chunks, a->nchunks*sizeof(size_t), sz*sizeof(size_t));
     b->nchunks = sz;
 }
 
@@ -29,21 +29,38 @@
 
     bs = xalloc(sizeof(Bitset));
     bs->nchunks = 1;
-    bs->chunks = zalloc(1*sizeof(uint));
+    bs->chunks = zalloc(1*sizeof(size_t));
     return bs;
 }
 
-Bitset *dupbs(Bitset *a)
+void bsfree(Bitset *bs)
 {
+    free(bs->chunks);
+    free(bs);
+}
+
+Bitset *bsdup(Bitset *a)
+{
     Bitset *bs;
 
     bs = xalloc(sizeof(Bitset));
     bs->nchunks = a->nchunks;
-    bs->chunks = xalloc(a->nchunks*sizeof(uint));
-    memcpy(bs->chunks, a->chunks, a->nchunks*sizeof(uint));
+    bs->chunks = xalloc(a->nchunks*sizeof(size_t));
+    memcpy(bs->chunks, a->chunks, a->nchunks*sizeof(size_t));
     return bs;
 }
 
+Bitset *bsclear(Bitset *bs)
+{
+    size_t i;
+
+    if (!bs)
+	return mkbs();
+    for (i = 0; i < bs->nchunks; i++)
+	bs->chunks[i] = 0;
+    return bs;
+}
+
 size_t bscount(Bitset *bs)
 {
     size_t i, j, n;
@@ -50,18 +67,31 @@
 
     n = 0;
     for (i = 0; i < bs->nchunks; i++)
-        for (j = 1; j < sizeof(uint)*CHAR_BIT; j++)
-            if (bs->chunks[i] & 1 << j)
+        for (j = 1; j < sizeof(size_t)*CHAR_BIT; j++)
+            if (bs->chunks[i] & 1ULL << j)
                 n++;
     return n;
 }
 
+int bsiter(Bitset *bs, size_t *elt)
+{
+    size_t i;
+
+    for (i = *elt; i < bsmax(bs); i++) {
+	if (bshas(bs, i)) {
+	    *elt = i;
+	    return 1;
+	}
+    }
+    return 0;
+}
+
 /* Returns the largest value that the bitset can possibly
  * hold. It's conservative, but scanning the entire bitset
  * is a bit slow. This is mostly an aid to iterate over it. */
 size_t bsmax(Bitset *bs)
 {
-    return bs->nchunks*sizeof(uint)*CHAR_BIT;
+    return bs->nchunks*Sizetbits;
 }
 
 void delbs(Bitset *bs)
@@ -70,29 +100,29 @@
     free(bs);
 }
 
-void bsput(Bitset *bs, uint elt)
+void bsput(Bitset *bs, size_t elt)
 {
     size_t sz;
-    if (elt >= bs->nchunks*Uintbits) {
-        sz = (elt/Uintbits)+1;
-        bs->chunks = zrealloc(bs->chunks, bs->nchunks*sizeof(uint), sz*sizeof(uint));
+    if (elt >= bs->nchunks*Sizetbits) {
+        sz = (elt/Sizetbits)+1;
+        bs->chunks = zrealloc(bs->chunks, bs->nchunks*sizeof(size_t), sz*sizeof(size_t));
         bs->nchunks = sz;
     }
-    bs->chunks[elt/Uintbits] |= 1 << (elt % Uintbits);
+    bs->chunks[elt/Sizetbits] |= 1ULL << (elt % Sizetbits);
 }
 
-void bsdel(Bitset *bs, uint elt)
+void bsdel(Bitset *bs, size_t elt)
 {
-    if (elt < bs->nchunks*Uintbits)
-        bs->chunks[elt/Uintbits] &= ~(1 << (elt % Uintbits));
+    if (elt < bs->nchunks*Sizetbits)
+        bs->chunks[elt/Sizetbits] &= ~(1ULL << (elt % Sizetbits));
 }
 
-int bshas(Bitset *bs, uint elt)
+int bshas(Bitset *bs, size_t elt)
 {
-    if (elt >= bs->nchunks*Uintbits)
+    if (elt >= bs->nchunks*Sizetbits)
         return 0;
     else
-        return bs->chunks[elt/Uintbits] & (1 << (elt % Uintbits));
+        return bs->chunks[elt/Sizetbits] & (1ULL << (elt % Sizetbits));
 }
 
 void bsunion(Bitset *a, Bitset *b)
@@ -120,6 +150,17 @@
     eqsz(a, b);
     for (i = 0; i < a->nchunks; i++)
         a->chunks[i] &= ~b->chunks[i];
+}
+
+int bseq(Bitset *a, Bitset *b)
+{
+    size_t i;
+
+    eqsz(a, b);
+    for (i = 0; i < a->nchunks; i++)
+	if (a->chunks[i] != b->chunks[i])
+	    return 0;
+    return 1;
 }
 
 int bsissubset(Bitset *set, Bitset *sub)
--- a/parse/infer.c
+++ b/parse/infer.c
@@ -155,9 +155,9 @@
         if (a->cstrs && b->cstrs)
             bsunion(b->cstrs, a->cstrs);
         else if (a->cstrs)
-            b->cstrs = dupbs(a->cstrs);
+            b->cstrs = bsdup(a->cstrs);
         else if (b->cstrs)
-            a->cstrs = dupbs(b->cstrs);
+            a->cstrs = bsdup(b->cstrs);
     } else {
         if (!cstrcheck(a, b))
             fatal(ctx->line, "%s incompatible with %s near %s", tystr(a), tystr(b), ctxstr(ctx));
--- a/parse/parse.h
+++ b/parse/parse.h
@@ -54,7 +54,7 @@
 
 struct Bitset {
     size_t nchunks;
-    uint *chunks;
+    size_t *chunks;
 };
 
 struct Htab {
@@ -226,17 +226,21 @@
 
 /* data structures */
 Bitset *mkbs(void);
-Bitset *dupbs(Bitset *bs);
+void    bsfree(Bitset *bs);
+Bitset *bsdup(Bitset *bs);
+Bitset *bsclear(Bitset *bs);
 void delbs(Bitset *bs);
-void bsput(Bitset *bs, uint elt);
-void bsdel(Bitset *bs, uint elt);
+void bsput(Bitset *bs, size_t elt);
+void bsdel(Bitset *bs, size_t elt);
 void bsunion(Bitset *a, Bitset *b);
 void bsintersect(Bitset *a, Bitset *b);
 void bsdiff(Bitset *a, Bitset *b);
-int  bshas(Bitset *bs, uint elt);
+int  bshas(Bitset *bs, size_t elt);
+int  bseq(Bitset *a, Bitset *b);
 int  bsissubset(Bitset *set, Bitset *sub);
-size_t bscount(Bitset *bs);
+int  bsiter(Bitset *bs, size_t *elt);
 size_t bsmax(Bitset *bs);
+size_t bscount(Bitset *bs);
 
 Htab *mkht(ulong (*hash)(void *key), int (*cmp)(void *k1, void *k2));
 int htput(Htab *ht, void *k, void *v);
@@ -363,6 +367,8 @@
 
 /* convenience func */
 void lappend(void *l, size_t *len, void *n); /* ugly hack; nl is void* because void*** is incompatible with T*** */
+void *lpop(void *l, size_t *len);
+void ldel(void *l, size_t *len, size_t idx);
 void lfree(void *l, size_t *len);
 
 /* Options to control the compilation */
--- a/parse/util.c
+++ b/parse/util.c
@@ -18,7 +18,7 @@
     void *mem;
 
     mem = calloc(1, sz);
-    if (!mem)
+    if (!mem && sz)
         die("Out of memory");
     return mem;
 }
@@ -29,7 +29,7 @@
     void *mem;
 
     mem = malloc(sz);
-    if (!mem)
+    if (!mem && sz)
         die("Out of memory");
     return mem;
 }
@@ -47,7 +47,7 @@
 void *xrealloc(void *mem, size_t sz)
 {
     mem = realloc(mem, sz);
-    if (!mem)
+    if (!mem && sz)
         die("Out of memory");
     return mem;
 }
@@ -100,17 +100,44 @@
 
     assert(n != NULL);
     pl = l;
-    *pl = xrealloc(*pl, (*len + 1)*sizeof(Node*));
+    *pl = xrealloc(*pl, (*len + 1)*sizeof(void*));
     (*pl)[*len] = n;
     (*len)++;
 }
 
+void *lpop(void *l, size_t *len)
+{
+    void ***pl;
+    void *v;
+
+    pl = l;
+    (*len)--;
+    v = (*pl)[*len];
+    *pl = xrealloc(*pl, *len * sizeof(void*));
+    return v;
+}
+
+void ldel(void *l, size_t *len, size_t idx)
+{
+    void ***pl;
+    size_t i;
+
+    assert(l != NULL);
+    assert(idx < *len);
+    pl = l;
+    for (i = idx; i < *len - 1; i++)
+	pl[i] = pl[i + 1];
+    (*len)--;
+    *pl = xrealloc(*pl, *len * sizeof(void*));
+}
+
+
 void lfree(void *l, size_t *len)
 {
     void ***pl;
 
+    assert(l != NULL);
     pl = l;
-    assert(pl != NULL);
     free(*pl);
     *pl = NULL;
     *len = 0;