shithub: mc

Download patch

ref: 7a66a41387eba0b82b09d6a8863610a3589f216d
parent: 75cf80b3680ec08c42145aa0f52d7c65dad33766
author: Ori Bernstein <[email protected]>
date: Thu Jun 14 22:21:28 EDT 2012

Get the register allocator closer to working.

    We abort in many cases, but we try to colour things.

--- a/8/asm.h
+++ b/8/asm.h
@@ -1,6 +1,8 @@
 #define Maxarg 4
 #define K 4 /* 4 general purpose regs with all modes available */
 
+typedef size_t regid;
+
 typedef struct Insn Insn;
 typedef struct Loc Loc;
 typedef struct Func Func;
@@ -52,7 +54,7 @@
     union {
         char *lbl;
         struct {
-            long  id;
+            regid id;
             Reg   colour;
         } reg;
         long  lit;
@@ -115,20 +117,36 @@
     Loc *stksz;
 
     /* register allocator state */
+    Bitset *initial; /* locations that need to be a specific colour */
     Bitset *prepainted; /* locations that need to be a specific colour */
 
     size_t *gbits;      /* igraph matrix repr */
     Bitset **gadj;      /* igraph adj set repr */
+    int *degree;        /* degree of nodes */
+    Loc **aliasmap;   /* mapping of aliases */
 
-    Insn ***moves;
-    size_t *nmoves;
-    Insn **activemoves;
-    size_t nactivemoves;
+    Loc  **selstk;
+    size_t nselstk;
 
-    Bitset *selstk;
     Bitset *coalesced;
+    Bitset *spilled;
 
-    int *degree;        /* degree of nodes */
+    Insn ***rmoves;
+    size_t *nrmoves;
+
+    /* move sets */
+    Insn **mcoalesced;
+    size_t nmcoalesced;
+
+    Insn **mconstrained;
+    size_t nmconstrained;
+
+    Insn **mfrozen;
+    size_t nmfrozen;
+
+    Insn **mactive;
+    size_t nmactive;
+
 
     /* worklists */
     Insn **wlmove;
--- a/8/ra.c
+++ b/8/ra.c
@@ -34,6 +34,37 @@
 #undef Def
 };
 
+Reg regmap[6][Nmode] = {
+    [0] = {Rnone, Ral, Rax, Reax},
+    [1] = {Rnone, Rcl, Rcx, Recx},
+    [2] = {Rnone, Rdl, Rdx, Redx},
+    [3] = {Rnone, Rbl, Rbx, Rebx},
+    [4] = {Rnone, Rnone, Rnone, Resp},
+    [5] = {Rnone, Rnone, Rnone, Rebp},
+};
+
+int colourmap[Nreg] = {
+    /* byte */
+    [Ral] = 0,
+    [Rcl] = 1,
+    [Rdl] = 2,
+    [Rbl] = 3,
+
+    /* word */
+    [Rax] = 0,
+    [Rcx] = 1,
+    [Rdx] = 2,
+    [Rbx] = 3,
+
+    /* dword */
+    [Reax] = 0,
+    [Recx] = 1,
+    [Redx] = 2,
+    [Rebx] = 3,
+    [Resp] = 4,
+    [Rebp] = 5,
+};
+
 static size_t uses(Insn *insn, long *u)
 {
     size_t i, j;
@@ -102,7 +133,7 @@
 
 static void udcalc(Asmbb *bb)
 {
-    /* up to 2 registers per memloc, so 
+    /* up to 2 registers per memloc, so
      * 2*Maxarg is the maximum number of
      * uses or defs we can see */
     long   u[2*Maxarg], d[2*Maxarg];
@@ -157,26 +188,28 @@
     }
 }
 
+/* we're only interested in register->register moves */
 static int ismove(Insn *i)
 {
-    return i->op == Imov;
+    if (i->op != Imov)
+	return 0;
+    return i->args[0]->type == Locreg && i->args[1]->type == Locreg;
 }
 
-/* static */ int gbhasedge(size_t *g, size_t u, size_t v)
+/* static */ int gbhasedge(Isel *s, size_t u, size_t v)
 {
     size_t i;
     i = (maxregid * v) + u;
-    return g[i/Sizetbits] & (1ULL <<(i % Sizetbits));
+    return s->gbits[i/Sizetbits] & (1ULL <<(i % Sizetbits));
 }
 
-static void gbputedge(size_t *g, size_t u, size_t v)
+static void gbputedge(Isel *s, size_t u, size_t v)
 {
     size_t i, j;
     i = (maxregid * v) + u;
     j = (maxregid * u) + v;
-    g[i/Sizetbits] |= 1ULL <<(i % Sizetbits);
-    g[j/Sizetbits] |= 1ULL <<(j % Sizetbits);
-    assert(!gbhasedge(g, 5, 5));
+    s->gbits[i/Sizetbits] |= 1ULL <<(i % Sizetbits);
+    s->gbits[j/Sizetbits] |= 1ULL <<(j % Sizetbits);
 }
 
 static void addedge(Isel *s, size_t u, size_t v)
@@ -183,7 +216,7 @@
 {
     if (u == v)
 	return;
-    gbputedge(s->gbits, u, v);
+    gbputedge(s, u, v);
     if (!bshas(s->prepainted, u)) {
 	bsput(s->gadj[u], v);
 	s->degree[u]++;
@@ -214,10 +247,19 @@
     free(s->gadj);
     s->gadj = gadj;
 
+    s->spilled = bsclear(s->spilled);
     s->prepainted = bsclear(s->prepainted);
+    s->coalesced = bsclear(s->coalesced);
+    /*
+    s->wlspill = bsclear(s->wlspill);
+    s->wlfreeze = bsclear(s->wlfreeze);
+    s->wlsimp = bsclear(s->wlsimp);
+    */
+
+    s->aliasmap = zalloc(maxregid * sizeof(size_t));
     s->degree = zalloc(maxregid * sizeof(int));
-    s->moves = zalloc(maxregid * sizeof(Loc **));
-    s->nmoves = zalloc(maxregid * sizeof(size_t));
+    s->rmoves = zalloc(maxregid * sizeof(Loc **));
+    s->nrmoves = zalloc(maxregid * sizeof(size_t));
 }
 
 static void build(Isel *s)
@@ -237,7 +279,6 @@
     bb = s->bb;
     nbb = s->nbb;
 
-    //s->graph = zalloc(maxregid * sizeof(size_t));
     for (i = 0; i < nbb; i++) {
 	live = bsdup(bb[i]->liveout);
 	for (j = bb[i]->ni - 1; j >= 0; j--) {
@@ -244,6 +285,9 @@
 	    insn = bb[i]->il[j];
 	    nu = uses(insn, u);
 	    nd = defs(insn, d);
+
+	    /* moves get special treatment, since we don't want spurious
+	     * edges between the src and dest */
 	    if (ismove(insn)) {
 		/* live \= uses(i) */
 		for (k = 0; k < nu; k++)
@@ -250,9 +294,9 @@
 		    bsdel(live, u[k]);
 
 		for (k = 0; k < nu; k++)
-		    lappend(&s->moves[u[k]], &s->nmoves[u[k]], insn);
+		    lappend(&s->rmoves[u[k]], &s->nrmoves[u[k]], insn);
 		for (k = 0; k < nd; k++)
-		    lappend(&s->moves[d[k]], &s->nmoves[d[k]], insn);
+		    lappend(&s->rmoves[d[k]], &s->nrmoves[d[k]], insn);
 		lappend(&s->wlmove, &s->nwlmove, insn);
 	    }
 	    for (k = 0; k < nd; k++)
@@ -265,43 +309,45 @@
     }
 }
 
-Bitset *adjacent(Isel *s, size_t n)
+Bitset *adjacent(Isel *s, regid n)
 {
     Bitset *r;
+    size_t i;
 
     r = bsdup(s->gadj[n]);
-    bsdiff(r, s->selstk);
+    for (i = 0; i < s->nselstk; i++)
+	bsdel(r, s->selstk[i]->reg.id);
     bsdiff(r, s->coalesced);
     return r;
 }
 
-size_t nodemoves(Isel *s, size_t nid, Insn ***pil)
+size_t nodemoves(Isel *s, regid n, Insn ***pil)
 {
     size_t i, j;
-    size_t n;
+    size_t count;
 
     /* FIXME: inefficient. Do I care? */
-    n = 0;
-    for (i = 0; i < s->nmoves[nid]; i++) {
-	for (j = 0; j < s->nactivemoves; j++) {
-	    if (s->activemoves[j] == s->moves[nid][i]) {
+    count = 0;
+    for (i = 0; i < s->nrmoves[n]; i++) {
+	for (j = 0; j < s->nmactive; j++) {
+	    if (s->mactive[j] == s->rmoves[n][i]) {
 		if (pil)
-		    lappend(pil, &n, s->moves[nid][i]);
+		    lappend(pil, &count, s->rmoves[n][i]);
 		continue;
 	    }
 	}
 	for (j = 0; j < s->nwlmove; j++) {
-	    if (s->wlmove[j] == s->moves[nid][i]) {
+	    if (s->wlmove[j] == s->rmoves[n][i]) {
 		if (pil)
-		    lappend(pil, &n, s->moves[nid][i]);
+		    lappend(pil, &count, s->rmoves[n][i]);
 		continue;
 	    }
 	}
     }
-    return n;
+    return count;
 }
 
-static int moverelated(Isel *s, size_t n)
+static int moverelated(Isel *s, regid n)
 {
     return nodemoves(s, n, NULL) != 0;
 }
@@ -311,7 +357,9 @@
     size_t i;
 
     for (i = 0; i < maxregid; i++) {
-	if (s->degree[i] >= K)
+	if (locmap[i]->reg.colour)
+	    bsput(s->prepainted, i);
+	else if (s->degree[i] >= K)
 	    lappend(&s->wlspill, &s->nwlspill, locmap[i]);
 	else if (moverelated(s, i))
 	    lappend(&s->wlfreeze, &s->nwlfreeze, locmap[i]);
@@ -320,40 +368,266 @@
     }
 }
 
+void enablemove(Isel *s, regid n)
+{
+    size_t i, j;
+    Insn **il;
+    size_t ni;
+
+    ni = nodemoves(s, n, &il);
+    for (i = 0; i < ni; i++) {
+	for (j = 0; j < s->nmactive; j++) {
+	    if (il[i] == s->mactive[j]) {
+		ldel(&s->mactive, &s->nmactive, j);
+		lappend(&s->wlmove, &s->nwlmove, il[i]);
+	    }
+	}
+    }
+}
+
+void decdegree(Isel *s, regid n)
+{
+    int d;
+    regid m;
+    Bitset *adj;
+
+    d = s->degree[n];
+    s->degree[n]--;
+
+    if (d == K) {
+	adj = adjacent(s, m);
+	enablemove(s, m);
+	for (m = 0; bsiter(adj, &m); m++)
+	    enablemove(s, n);
+	bsfree(adj);
+    }
+}
+
 void simp(Isel *s)
 {
+    Loc *l;
+    Bitset *adj;
+    regid m;
+
+    l = lpop(&s->wlsimp, &s->nwlsimp);
+    lappend(&s->selstk, &s->nselstk, l);
+    adj = adjacent(s, l->reg.id);
+    for (m = 0; bsiter(adj, &m); m++)
+	decdegree(s, m);
+    bsfree(adj);
 }
 
+regid getalias(Isel *s, regid id)
+{
+    while (1) {
+	if (!s->aliasmap[id])
+	    break;
+	id = s->aliasmap[id]->reg.id;
+    };
+    return id;
+}
+
+void wladd(Isel *s, regid u)
+{
+    size_t i;
+
+    if (bshas(s->prepainted, u))
+	return;
+    if (moverelated(s, u))
+	return;
+    if (s->degree[u] >= K)
+	return;
+    for (i = 0; i < s->nwlfreeze; i++)
+	if (s->wlfreeze[i]->reg.id == u)
+	    ldel(&s->wlfreeze, &s->nwlfreeze, i);
+    lappend(&s->wlsimp, &s->nwlsimp, locmap[u]);
+}
+
+int conservative(Isel *s, regid u, regid v)
+{
+    int k;
+    regid n;
+    size_t i;
+    Bitset *uadj;
+    Bitset *vadj;
+
+    uadj = adjacent(s, u);
+    vadj = adjacent(s, u);
+    k = 0;
+    for (i = 0; bsiter(uadj, &n); i++)
+	if (s->degree[n] >= K)
+	    k++;
+    for (i = 0; bsiter(vadj, &n); i++)
+	if (s->degree[n] >= K)
+	    k++;
+    bsfree(uadj);
+    bsfree(vadj);
+    return k < K;
+}
+
+/* FIXME: is this actually correct? */
+int ok(Isel *s, regid t, regid r)
+{
+    if (s->degree[t] >= K)
+	return 0;
+    if (!bshas(s->prepainted, t))
+	return 0;
+    if (!gbhasedge(s, t, r))
+	return 0;
+    return 1;
+}
+
+int combinable(Isel *s, regid u, regid v)
+{
+    regid t;
+    Bitset *adj;
+
+    /* if u isn't prepainted, can we conservatively coalesce? */
+    if (!bshas(s->prepainted, u) && conservative(s, u, v))
+	return 1;
+
+    /* if it is, are the adjacent nodes ok to combine with this? */
+    adj = adjacent(s, u);
+    for (t = 0; bsiter(adj, &t); t++)
+	if (!ok(s, t, u))
+	    return 0;
+    bsfree(adj);
+    return 1;
+}
+
+void combine(Isel *s, regid u, regid v)
+{
+    printf("Want to combine ");
+    locprint(stdout, locmap[u]);
+    printf(" with ");
+    locprint(stdout, locmap[v]);
+    printf("\n");
+}
+
 void coalesce(Isel *s)
 {
+    Insn *m;
+    regid u, v, tmp;
+
+    m = lpop(&s->wlmove, &s->nwlmove);
+    /* FIXME: src, dst? dst, src? Does it matter? */
+    u = getalias(s, m->args[0]->reg.id);
+    v = getalias(s, m->args[1]->reg.id);
+
+    if (bshas(s->prepainted, v)) {
+	tmp = u;
+	u = v;
+	v = tmp;
+    }
+
+    if (u == v) {
+	lappend(&s->mcoalesced, &s->nmcoalesced, m);
+	wladd(s, u);
+	wladd(s, v);
+    } else if (bshas(s->prepainted, v) || gbhasedge(s, u, v)) {
+	lappend(&s->mconstrained, &s->nmconstrained, m);
+	wladd(s, u);
+	wladd(s, v);
+    } else if (combinable(s, u, v)) {
+	lappend(&s->mcoalesced, &s->nmcoalesced, m);
+	combine(s, u, v);
+	wladd(s, u);
+    } else {
+	lappend(&s->mactive, &s->nmactive, m);
+    }
 }
 
+void freezemoves(Isel *s, regid u)
+{
+    die("Implement freeze moves\n");
+}
+
 void freeze(Isel *s)
 {
+    Loc *l;
+
+    l = lpop(&s->wlfreeze, &s->nwlfreeze);
+    lappend(&s->wlsimp, &s->nwlsimp, l);
+    freezemoves(s, l->reg.id);
 }
 
 void spill(Isel *s)
 {
+    die("Implement spilling\n");
 }
 
+int paint(Isel *s)
+{
+    int taken[K + 2]; /* esp, ebp aren't "real colours" */
+    Bitset *adj;
+    Loc *n, *w;
+    regid l;
+    size_t i;
+    int spilled;
+    int found;
+
+    spilled = 0;
+    while (s->nselstk) {
+	bzero(taken, K*sizeof(int));
+	n = lpop(&s->selstk, &s->nselstk);
+
+	adj = adjacent(s, n->reg.id);
+	for (l = 0; bsiter(adj, &l); l++) {
+	    w = locmap[getalias(s, l)];
+	    if (w->reg.colour)
+		taken[colourmap[w->reg.colour]] = 1;
+	}
+	bsfree(adj);
+
+	found = 0;
+	for (i = 0; i < K; i++) {
+	    if (!taken[i]) {
+		n->reg.colour = regmap[i][n->mode];
+		found = 1;
+	    }
+	}
+	if (!found) {
+	    spilled = 1;
+	    bsput(s->spilled, n->reg.id);
+	}
+    }
+    for (l = 0; bsiter(s->coalesced, &l); l++) {
+	n = locmap[getalias(s, l)];
+	locmap[l]->reg.colour = n->reg.colour;
+    }
+    return spilled;
+}
+
+void rewrite(Isel *s)
+{
+    die("Rewrite spills!");
+}
+
 void regalloc(Isel *s)
 {
-    liveness(s);
-    build(s);
-    if (debug)
-	dumpasm(s, stdout);
+    int spilled;
+
     do {
-	if (s->nwlsimp)
-	    simp(s);
-	else if (s->nwlmove)
-	    coalesce(s);
-	else if (s->nwlfreeze)
-	    freeze(s);
-	else if (s->nwlspill)
-	    spill(s);
-	break;
-    } while (s->nwlsimp || s->nwlmove || s->nwlfreeze || s->nwlspill);
-    mkworklist(s);
+	liveness(s);
+	build(s);
+	mkworklist(s);
+	if (debug)
+	    dumpasm(s, stdout);
+	do {
+	    if (s->nwlsimp)
+		simp(s);
+	    else if (s->nwlmove)
+		coalesce(s);
+	    else if (s->nwlfreeze)
+		freeze(s);
+	    else if (s->nwlspill)
+		spill(s);
+	} while (s->nwlsimp || s->nwlmove || s->nwlfreeze || s->nwlspill);
+	spilled = paint(s);
+	if (spilled)
+	    rewrite(s);
+    } while (spilled);
+
 }
 
 static void setprint(FILE *fd, Bitset *s)
@@ -396,7 +670,7 @@
     fprintf(fd, "IGRAPH ----- \n");
     for (i = 0; i < maxregid; i++) {
 	for (j = i; j < maxregid; j++) {
-	    if (gbhasedge(s->gbits, i, j)) {
+	    if (gbhasedge(s, i, j)) {
 		locprint(fd, locmap[i]);
 		fprintf(fd, " -- ");
 		locprint(fd, locmap[j]);
--- a/parse/bitset.c
+++ b/parse/bitset.c
@@ -33,6 +33,12 @@
     return bs;
 }
 
+void bsfree(Bitset *bs)
+{
+    free(bs->chunks);
+    free(bs);
+}
+
 Bitset *bsdup(Bitset *a)
 {
     Bitset *bs;
--- a/parse/parse.h
+++ b/parse/parse.h
@@ -224,6 +224,7 @@
 
 /* data structures */
 Bitset *mkbs(void);
+void    bsfree(Bitset *bs);
 Bitset *bsdup(Bitset *bs);
 Bitset *bsclear(Bitset *bs);
 void delbs(Bitset *bs);
@@ -364,6 +365,8 @@
 
 /* convenience func */
 void lappend(void *l, size_t *len, void *n); /* ugly hack; nl is void* because void*** is incompatible with T*** */
+void *lpop(void *l, size_t *len);
+void ldel(void *l, size_t *len, size_t idx);
 void lfree(void *l, size_t *len);
 
 /* Options to control the compilation */
--- a/parse/util.c
+++ b/parse/util.c
@@ -18,7 +18,7 @@
     void *mem;
 
     mem = calloc(1, sz);
-    if (!mem)
+    if (!mem && sz)
         die("Out of memory");
     return mem;
 }
@@ -29,7 +29,7 @@
     void *mem;
 
     mem = malloc(sz);
-    if (!mem)
+    if (!mem && sz)
         die("Out of memory");
     return mem;
 }
@@ -47,7 +47,7 @@
 void *xrealloc(void *mem, size_t sz)
 {
     mem = realloc(mem, sz);
-    if (!mem)
+    if (!mem && sz)
         die("Out of memory");
     return mem;
 }
@@ -100,17 +100,44 @@
 
     assert(n != NULL);
     pl = l;
-    *pl = xrealloc(*pl, (*len + 1)*sizeof(Node*));
+    *pl = xrealloc(*pl, (*len + 1)*sizeof(void*));
     (*pl)[*len] = n;
     (*len)++;
 }
 
+void *lpop(void *l, size_t *len)
+{
+    void ***pl;
+    void *v;
+
+    pl = l;
+    (*len)--;
+    v = (*pl)[*len];
+    *pl = xrealloc(*pl, *len * sizeof(void*));
+    return v;
+}
+
+void ldel(void *l, size_t *len, size_t idx)
+{
+    void ***pl;
+    size_t i;
+
+    assert(l != NULL);
+    assert(idx < *len);
+    pl = l;
+    for (i = idx; i < *len - 1; i++)
+	pl[i] = pl[i + 1];
+    (*len)--;
+    *pl = xrealloc(*pl, *len * sizeof(void*));
+}
+
+
 void lfree(void *l, size_t *len)
 {
     void ***pl;
 
+    assert(l != NULL);
     pl = l;
-    assert(pl != NULL);
     free(*pl);
     *pl = NULL;
     *len = 0;
--- a/test/test.sh
+++ b/test/test.sh
@@ -8,7 +8,7 @@
     echo $MC $1.myr && \
     $MC $1.myr && \
     mv a.s $1.s #&& \
-#    cc $ASOPT -m32 -o $1 $1.s
+    cc $ASOPT -m32 -o $1 $1.s
 }
 
 function prints {