ref: e35235a26e740630ee9d5cdb9564f7248d19df93
parent: 026ccbd147926e470ff77a5f1bc613dad7017799
author: Ori Bernstein <[email protected]>
date: Tue Sep 2 12:21:44 EDT 2014
Switch to much faster register based blitting. Most values are small. Don't use rep movs. It's fast on many uarches, but has high fixed cost. 5x speedup in intsort. Fuck yeah.
--- a/6/isel.c
+++ b/6/isel.c
@@ -374,25 +374,31 @@
static void blit(Isel *s, Loc *to, Loc *from, size_t dstoff, size_t srcoff, size_t sz)
{
- Loc *sp, *dp, *len; /* pointers to src, dst */
+ size_t i, sz;
+ Loc *sp, *dp; /* pointers to src, dst */
+ Loc *tmp, *src, *dst; /* source memory, dst memory */
- len = loclit(sz, ModeQ);
- sp = newr(s, from);
- dp = newr(s, to);
+ sp = inr(s, from);
+ dp = inr(s, to);
- /* length to blit */
- g(s, Imov, len, locphysreg(Rrcx), NULL);
- /* source address with offset */
- if (srcoff)
- g(s, Ilea, locmem(srcoff, sp, NULL, ModeQ), locphysreg(Rrsi), NULL);
- else
- g(s, Imov, sp, locphysreg(Rrsi), NULL);
- /* dest address with offset */
- if (dstoff)
- g(s, Ilea, locmem(dstoff, dp, NULL, ModeQ), locphysreg(Rrdi), NULL);
- else
- g(s, Imov, dp, locphysreg(Rrdi), NULL);
- g(s, Irepmovsb, NULL);
+ /* Slightly funny loop condition: We might have trailing bytes
+ * that we can't blit word-wise. */
+ tmp = locreg(ModeQ);
+ for (i = 0; i < sz/Ptrsz; i++) {
+ src = locmem(i*Ptrsz + srcoff, sp, NULL, ModeQ);
+ dst = locmem(i*Ptrsz + dstoff, dp, NULL, ModeQ);
+ g(s, Imov, src, tmp, NULL);
+ g(s, Imov, tmp, dst, NULL);
+ }
+ /* now, the trailing bytes */
+ tmp = locreg(ModeB);
+ i *= Ptrsz; /* we counted in Ptrsz chunks; now we need a byte offset */
+ for (; i < sz; i++) {
+ src = locmem(i, sp, NULL, ModeB);
+ dst = locmem(i, dp, NULL, ModeB);
+ g(s, Imov, src, tmp, NULL);
+ g(s, Imov, tmp, dst, NULL);
+ }
}
static int isfunc(Isel *s, Node *n)
--- a/bench/Makefile
+++ b/bench/Makefile
@@ -6,8 +6,13 @@
include ../config.mk
include ../mk/c.mk
-bench: runner $(BENCHSRC:.myr=)
+bench: runner cleanbuild
./runner $(BENCHSRC:.myr=)
-$(BENCHSRC:.myr=): $(BENCHSRC)
- ../myrbuild/myrbuild -b $@ [email protected]
+.PHONY: cleanbuild
+cleanbuild:
+ rm -f $(BENCHSRC:.myr=) $(BENCHSRC:.myr=.o) $(BENCHSRC:.myr=.use)
+ @for i in $(BENCHSRC:.myr=); do \
+ ../myrbuild/myrbuild -b $$i $$i.myr; \
+ done
+