ref: 538f78b722586f02734d53e8a90d3227c9ad22ed
parent: 45e91dd432edaa124a2699b5d35a3a86f6eedfad
author: Ori Bernstein <[email protected]>
date: Sat May 24 19:24:23 EDT 2014
Don't iterate over bit sets as much. We shouldn't need O(n) bit set iteration when we can just keep a proper adjacency list.
--- a/6/asm.h
+++ b/6/asm.h
@@ -137,7 +137,8 @@
Bitset *initial; /* initial set of locations used by this fn */
size_t *gbits; /* igraph matrix repr */
- Bitset **gadj; /* igraph adj set repr */
+ regid **_gadj; /* igraph adj set repr */
+ size_t *ngadj;
int *degree; /* degree of nodes */
Loc **aliasmap; /* mapping of aliases */
--- a/6/ra.c
+++ b/6/ra.c
@@ -349,6 +349,13 @@
return 1;
}
+static void alputedge(Isel *s, regid u, regid v)
+{
+ s->ngadj[u]++;
+ s->_gadj[u] = xrealloc(s->_gadj[u], s->ngadj[u]*sizeof(regid));
+ s->_gadj[u][s->ngadj[u] - 1] = v;
+}
+
static void addedge(Isel *s, regid u, regid v)
{
if (u == v || gbhasedge(s, u, v))
@@ -361,11 +368,11 @@
gbputedge(s, u, v);
gbputedge(s, v, u);
if (!bshas(s->prepainted, u)) {
- bsput(s->gadj[u], v);
+ alputedge(s, u, v);
s->degree[u] += degreechange(s, v, u);
}
if (!bshas(s->prepainted, v)) {
- bsput(s->gadj[v], u);
+ alputedge(s, v, u);
s->degree[v] += degreechange(s, u, v);
}
}
@@ -379,10 +386,10 @@
gchunks = (maxregid*maxregid)/Sizetbits + 1;
s->gbits = zalloc(gchunks*sizeof(size_t));
/* fresh adj list repr. */
- free(s->gadj);
- s->gadj = zalloc(maxregid * sizeof(Bitset*));
- for (i = 0; i < maxregid; i++)
- s->gadj[i] = mkbs();
+ free(s->_gadj);
+ free(s->ngadj);
+ s->_gadj = zalloc(maxregid * sizeof(regid*));
+ s->ngadj = zalloc(maxregid * sizeof(size_t));
s->spilled = bsclear(s->spilled);
s->coalesced = bsclear(s->coalesced);
@@ -470,22 +477,16 @@
}
}
-static int adjiter(Isel *s, regid n, regid *m)
+static int adjavail(Isel *s, regid r)
{
- size_t i, r;
+ size_t i;
- for (r = *m; bsiter(s->gadj[n], &r); r++) {
- for (i = 0; i < s->nselstk; i++)
- if (r == s->selstk[i]->reg.id)
- goto next;
- if (bshas(s->coalesced, r))
- goto next;
- *m = r;
- return 1;
-next:
- continue;
- }
- return 0;
+ if (bshas(s->coalesced, r))
+ return 0;
+ for (i = 0; i < s->nselstk; i++)
+ if (r == s->selstk[i]->reg.id)
+ return 0;
+ return 1;
}
static size_t nodemoves(Isel *s, regid n, Insn ***pil)
@@ -561,7 +562,7 @@
{
int before, after;
int found;
- size_t idx;
+ size_t idx, i;
regid n;
assert(m < maxregid);
@@ -571,8 +572,11 @@
if (before != after) {
enablemove(s, m);
- for (n = 0; adjiter(s, m, &n); n++)
- enablemove(s, n);
+ for (i = 0; i < s->ngadj[m]; i++) {
+ n = s->_gadj[m][i];
+ if (adjavail(s, n))
+ enablemove(s, n);
+ }
/* Subtle:
*
@@ -606,11 +610,14 @@
{
Loc *l;
regid m;
+ size_t i;
l = lpop(&s->wlsimp, &s->nwlsimp);
lappend(&s->selstk, &s->nselstk, l);
- for (m = 0; adjiter(s, l->reg.id, &m); m++) {
- decdegree(s, m);
+ for (i = 0; i < s->ngadj[l->reg.id]; i++) {
+ m = s->_gadj[l->reg.id][i];
+ if (adjavail(s, m))
+ decdegree(s, m);
}
}
@@ -643,15 +650,20 @@
static int conservative(Isel *s, regid u, regid v)
{
int k;
+ size_t i;
regid n;
k = 0;
- for (n = 0; adjiter(s, u, &n); n++)
- if (!istrivial(s, n))
+ for (i = 0; i < s->ngadj[u]; i++) {
+ n = s->_gadj[u][i];
+ if (adjavail(s, n) && !istrivial(s, n))
k++;
- for (n = 0; adjiter(s, v, &n); n++)
- if (!istrivial(s, n))
+ }
+ for (i = 0; i < s->ngadj[v]; i++) {
+ n = s->_gadj[v][i];
+ if (adjavail(s, n) && !istrivial(s, n))
k++;
+ }
return k < _K[rclass(locmap[u])];
}
@@ -664,6 +676,7 @@
static int combinable(Isel *s, regid u, regid v)
{
regid t;
+ size_t i;
/* Regs of different modes can't be combined as things stand.
* In principle they should be combinable, but it confused the
@@ -675,9 +688,11 @@
return 1;
/* if it is, are the adjacent nodes ok to combine with this? */
- for (t = 0; adjiter(s, v, &t); t++)
- if (!ok(s, t, u))
+ for (i = 0; i < s->ngadj[v]; i++) {
+ t = s->_gadj[v][i];
+ if (adjavail(s, t) && !ok(s, t, u))
return 0;
+ }
return 1;
}
@@ -711,7 +726,10 @@
lappend(&s->rmoves[u], &s->nrmoves[u], s->rmoves[v][i]);
}
- for (t = 0; adjiter(s, v, &t); t++) {
+ for (i = 0; i < s->ngadj[v]; i++) {
+ t = s->_gadj[v][i];
+ if (!adjavail(s, t))
+ continue;
if (debugopt['r'] > 2)
printedge(stdout, "combine-putedge:", t, u);
addedge(s, t, u);
@@ -847,7 +865,7 @@
int taken[Nreg];
Loc *n, *w;
regid l;
- int i;
+ size_t i, j;
int spilled;
int found;
@@ -856,7 +874,8 @@
bzero(taken, Nreg*sizeof(int));
n = lpop(&s->selstk, &s->nselstk);
- for (l = 0; bsiter(s->gadj[n->reg.id], &l); l++) {
+ for (j = 0; j < s->ngadj[n->reg.id];j++) {
+ l = s->_gadj[n->reg.id][j];
if (debugopt['r'] > 1)
printedge(stdout, "paint-edge:", n->reg.id, l);
w = locmap[getalias(s, l)];
--- a/parse/bitset.c
+++ b/parse/bitset.c
@@ -158,13 +158,6 @@
bs->chunks[elt/Sizetbits] &= ~(1ULL << (elt % Sizetbits));
}
-int bshas(Bitset *bs, size_t elt)
-{
- if (elt >= bs->nchunks*Sizetbits)
- return 0;
- else
- return (bs->chunks[elt/Sizetbits] & (1ULL << (elt % Sizetbits))) != 0;
-}
void bsunion(Bitset *a, Bitset *b)
{
--- a/parse/parse.h
+++ b/parse/parse.h
@@ -336,12 +336,18 @@
void bsunion(Bitset *a, Bitset *b);
void bsintersect(Bitset *a, Bitset *b);
void bsdiff(Bitset *a, Bitset *b);
-int bshas(Bitset *bs, size_t elt);
int bseq(Bitset *a, Bitset *b);
int bsissubset(Bitset *set, Bitset *sub);
int bsiter(Bitset *bs, size_t *elt);
size_t bsmax(Bitset *bs);
size_t bscount(Bitset *bs);
+/* inline for speed */
+static inline int bshas(Bitset *bs, size_t elt)
+{
+ if (elt >= bs->nchunks*8*sizeof(size_t))
+ return 0;
+ return (bs->chunks[elt/(8*sizeof(size_t))] & (1ULL << (elt % (8*sizeof(size_t))))) != 0;
+}
Htab *mkht(ulong (*hash)(void *key), int (*cmp)(void *k1, void *k2));
void htfree(Htab *ht);