ref: 84d48fceb5f71ed438aec8b7fa0576f0b9c7eb66
parent: 094074d7ecb008cd3d59dd0d011387299a9fe9c8
author: Ori Bernstein <[email protected]>
date: Tue Sep 2 12:21:44 EDT 2014
Switch to much faster register based blitting. Most values are small. Don't use rep movs. It's fast on many uarches, but has high fixed cost. 5x speedup in intsort. Fuck yeah.
--- a/6/isel.c
+++ b/6/isel.c
@@ -374,25 +374,31 @@
static void blit(Isel *s, Loc *to, Loc *from, size_t dstoff, size_t srcoff, size_t sz)
{
- Loc *sp, *dp, *len; /* pointers to src, dst */
+ size_t i, sz;
+ Loc *sp, *dp; /* pointers to src, dst */
+ Loc *tmp, *src, *dst; /* source memory, dst memory */
- len = loclit(sz, ModeQ);
- sp = newr(s, from);
- dp = newr(s, to);
+ sp = inr(s, from);
+ dp = inr(s, to);
- /* length to blit */
- g(s, Imov, len, locphysreg(Rrcx), NULL);
- /* source address with offset */
- if (srcoff)
- g(s, Ilea, locmem(srcoff, sp, NULL, ModeQ), locphysreg(Rrsi), NULL);
- else
- g(s, Imov, sp, locphysreg(Rrsi), NULL);
- /* dest address with offset */
- if (dstoff)
- g(s, Ilea, locmem(dstoff, dp, NULL, ModeQ), locphysreg(Rrdi), NULL);
- else
- g(s, Imov, dp, locphysreg(Rrdi), NULL);
- g(s, Irepmovsb, NULL);
+ /* Slightly funny loop condition: We might have trailing bytes
+ * that we can't blit word-wise. */
+ tmp = locreg(ModeQ);
+ for (i = 0; i < sz/Ptrsz; i++) {
+ src = locmem(i*Ptrsz + srcoff, sp, NULL, ModeQ);
+ dst = locmem(i*Ptrsz + dstoff, dp, NULL, ModeQ);
+ g(s, Imov, src, tmp, NULL);
+ g(s, Imov, tmp, dst, NULL);
+ }
+ /* now, the trailing bytes */
+ tmp = locreg(ModeB);
+ i *= Ptrsz; /* we counted in Ptrsz chunks; now we need a byte offset */
+ for (; i < sz; i++) {
+ src = locmem(i, sp, NULL, ModeB);
+ dst = locmem(i, dp, NULL, ModeB);
+ g(s, Imov, src, tmp, NULL);
+ g(s, Imov, tmp, dst, NULL);
+ }
}
static int isfunc(Isel *s, Node *n)
--- a/bench/Makefile
+++ b/bench/Makefile
@@ -6,8 +6,13 @@
include ../config.mk
include ../mk/c.mk
-bench: runner $(BENCHSRC:.myr=)
+bench: runner cleanbuild
./runner $(BENCHSRC:.myr=)
-$(BENCHSRC:.myr=): $(BENCHSRC)
- ../myrbuild/myrbuild -b $@ [email protected]
+.PHONY: cleanbuild
+cleanbuild:
+ rm -f $(BENCHSRC:.myr=) $(BENCHSRC:.myr=.o) $(BENCHSRC:.myr=.use)
+ @for i in $(BENCHSRC:.myr=); do \
+ ../myrbuild/myrbuild -b $$i $$i.myr; \
+ done
+