ref: 64f02da800fa67fc90084cdef6c6fee0dc24e9dd
parent: 4bc5bb61778432697dc3ce477335ada578cf3474
author: Ori Bernstein <[email protected]>
date: Wed Mar 20 09:47:56 EDT 2013
Use 'rep movs' in order to do blits. It's shorter, easier to read, and faster.
--- a/6/isel.c
+++ b/6/isel.c
@@ -340,31 +340,34 @@
static void blit(Isel *s, Loc *to, Loc *from, size_t dstoff, size_t srcoff, size_t sz)
{
- size_t i;
- Loc *sp, *dp; /* pointers to src, dst */
- Loc *tmp, *src, *dst; /* source memory, dst memory */
+ AsmOp op;
+ Loc *sp, *dp, *len; /* pointers to src, dst */
+ if (sz % 8 == 0) {
+ sz /= 8;
+ op = Irepmovsq;
+ } else if (sz % 4 == 0) {
+ sz /= 4;
+ op = Irepmovsl;
+ } else if (sz % 2 == 0) {
+ sz /= 2;
+ op = Irepmovsw;
+ } else {
+ op = Irepmovsb;
+ }
+
+ len = loclit(sz, ModeQ);
sp = inr(s, from);
dp = inr(s, to);
- /* Slightly funny loop condition: We might have trailing bytes
- * that we can't blit word-wise. */
- tmp = locreg(ModeQ);
- for (i = 0; i < sz/Ptrsz; i++) {
- src = locmem(i*Ptrsz + srcoff, sp, NULL, ModeQ);
- dst = locmem(i*Ptrsz + dstoff, dp, NULL, ModeQ);
- g(s, Imov, src, tmp, NULL);
- g(s, Imov, tmp, dst, NULL);
- }
- /* now, the trailing bytes */
- tmp = locreg(ModeB);
- i *= Ptrsz; /* we counted in Ptrsz chunks; now we need a byte offset */
- for (; i < sz; i++) {
- src = locmem(i, sp, NULL, ModeB);
- dst = locmem(i, dp, NULL, ModeB);
- g(s, Imov, src, tmp, NULL);
- g(s, Imov, tmp, dst, NULL);
- }
+ g(s, Imov, len, locphysreg(Rrcx), NULL); /* length to blit */
+ g(s, Imov, sp, locphysreg(Rrsi), NULL); /* source index */
+ g(s, Imov, dp, locphysreg(Rrdi), NULL); /* dest index */
+ if (srcoff)
+ g(s, Iadd, loclit(srcoff, ModeQ), locphysreg(Rrsi), NULL);
+ if (dstoff)
+ g(s, Iadd, loclit(dstoff, ModeQ), locphysreg(Rrdi), NULL);
+ g(s, op, NULL);
}
static Loc *gencall(Isel *s, Node *n)