shithub: mc

Download patch

ref: ba8dedb8ae3549451ae889495b6c9102dfbbcf74
parent: 55f9f36fd0a2dd5378f1a71a82aa9eb1b7f69562
parent: 64e3b36c8a1cc2c10248a4761e092234b8eccfe2
author: Ori Bernstein <[email protected]>
date: Mon Dec 30 10:00:11 EST 2013

Merge branch 'master' of git+ssh://git.eigenstate.org/git/ori/mc

Conflicts:
	parse/infer.c

--- a/6/insns.def
+++ b/6/insns.def
@@ -2,12 +2,13 @@
    is defined by the following macro:
         Insn(enumval, fmt, attr)
     The format string 'fmt' has the following expansions:
-        %r            - reg
+        %r            - int reg
+        %f            - xmm reg
         %m            - mem
         %i            - imm
         %v            - reg/mem
         %u            - reg/imm
-        %x            - reg/mem/imm
+        %x            - reg/freg/mem/imm
         %[1-9]*t      - Mode of an operand. The optional number
                         preceeding it is the operand desired for
                         the mode.
@@ -20,9 +21,10 @@
 
 Insn(Inone,     "BAD_INSN",                     Use(), Def())
 /* Note, the mov instruction is specified in an overly general manner. */
-Insn(Imov,      "\tmov%t %x,%x\n",             Use(.l={1}),                    Def(.l={2}))
-Insn(Imovz,     "\tmovz%1t%2t %x,%x\n",         Use(.l={1}),                    Def(.l={2}))
-Insn(Imovs,     "\tmovs%1t%2t %x,%x\n",         Use(.l={1}),                    Def(.l={2}))
+Insn(Imov,      "\tmov%t %x,%x\n",              Use(.l={1}),                    Def(.l={2}))
+Insn(Imovt,      "PSEUDO: TRUNCATE\n",          Use(.l={1}),                    Def(.l={2}))
+Insn(Imovzx,    "\tmovz%1t%2t %x,%x\n",         Use(.l={1}),                    Def(.l={2}))
+Insn(Imovsx,    "\tmovs%1t%2t %x,%x\n",         Use(.l={1}),                    Def(.l={2}))
 Insn(Irepmovsb, "\trep movsb\n",                Use(.r={Rrcx,Rrsi,Rrdi}),       Def())
 Insn(Irepmovsw, "\trep movsw\n",                Use(.r={Rrcx,Rrsi,Rrdi}),       Def())
 Insn(Irepmovsl, "\trep movsl\n",                Use(.r={Rrcx,Rrsi,Rrdi}),       Def())
@@ -32,6 +34,7 @@
 Insn(Iadd,      "\tadd%t %x,%r\n",              Use(.l={1,2}),                  Def(.l={2}))
 Insn(Isub,      "\tsub%t %x,%r\n",              Use(.l={1,2}),                  Def(.l={2}))
 Insn(Iimul,     "\timul%t %x,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
+/* there is no imul for 8 bit values. */
 Insn(Iimul_r,   "\timul%t %r\n",                Use(.l={1},.r={Ral}),           Def(.r={Rax}))
 Insn(Imul,      "\tmul%t %r\n",                 Use(.l={1},.r={Reax}),          Def(.r={Reax,Redx}))
 Insn(Idiv,      "\tdiv%t %r\n",                 Use(.l={1},.r={Reax,Redx}),     Def(.r={Reax,Redx}))
@@ -59,12 +62,15 @@
 Insn(Isetge,    "\tsetge %v\n",                 Use(),  Def(.l={1}))
 
 /* fp specific instructions */
+Insn(Imovs,      "\tmovs%1t %x,%x\n",           Use(.l={1}),                    Def(.l={2}))
 Insn(Icvttsd2si, "\tcvttsd2si%2t %x,%r\n",      Use(.l={1}),                    Def(.l={2}))
-Insn(Icvttsi2sd, "\tcvttsd2si%2t %x,%r\n",      Use(.l={1}),                    Def(.l={2}))
-Insn(Ifdiv,      "\tdiv%t %x,%r\n",             Use(.l={1},.r={Reax,Redx}),     Def(.r={Reax,Redx}))
-Insn(Ifmul,      "\tmul%t %x,%r\n",             Use(.l={1,2}),                  Def(.l={2}))
-Insn(Ifmov,      "\tmov%t %x,%x\n",             Use(.l={1,2}),                  Def(.l={2}))
-Insn(Icomi,      "\tcomi%t %x,%r\n",            Use(.l={1,2}),                  Def())
+Insn(Icvttsi2sd, "\tcvttsi2sd%2t %x,%f\n",      Use(.l={1}),                    Def(.l={2}))
+Insn(Iadds,      "\tadds%t %x,%f\n",            Use(.l={1,2}),                  Def(.l={2}))
+Insn(Isubs,      "\tsubs%t %x,%f\n",            Use(.l={1,2}),                  Def(.l={2}))
+Insn(Imuls,      "\tmuls%t %x,%f\n",            Use(.l={1,2}),                  Def(.l={2}))
+Insn(Idivs,      "\tdivs%t %x,%f\n",            Use(.l={1,2}),                  Def(.l={2}))
+Insn(Icomis,     "\tcomis%t %x,%f\n",           Use(.l={1,2}),                  Def())
+Insn(Ixorp,      "\tmuls%t %x,%f\n",            Use(.l={1,2}),                  Def(.l={2}))
 
 /* branch instructions */
 Insn(Icall,     "\tcall %v\n",                  Use(.l={1}), Def(.r={Rrax}))
--- a/6/isel.c
+++ b/6/isel.c
@@ -2,6 +2,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <stdarg.h>
+#include <inttypes.h>
 #include <ctype.h>
 #include <string.h>
 #include <assert.h>
@@ -28,8 +29,8 @@
   [ModeW] = "w",
   [ModeL] = "l",
   [ModeQ] = "q",
-  [ModeF] = "ss",
-  [ModeD] = "sd"
+  [ModeF] = "s",
+  [ModeD] = "d"
 };
 
 /* forward decls */
@@ -45,12 +46,12 @@
     AsmOp getflag;
 } reloptab[Numops] = {
     [Olnot] = {Itest, 0, Ijz, Isetz}, /* lnot invalid for floats */
-    [Oeq] = {Icmp, Icomi, Ijz, Isetz},
-    [One] = {Icmp, Icomi, Ijnz, Isetnz},
-    [Ogt] = {Icmp, Icomi, Ijg, Isetg},
-    [Oge] = {Icmp, Icomi, Ijge, Isetge},
-    [Olt] = {Icmp, Icomi, Ijl, Isetl},
-    [Ole] = {Icmp, Icomi, Ijle, Isetle}
+    [Oeq] = {Icmp, Icomis, Ijz,  Isetz},
+    [One] = {Icmp, Icomis, Ijnz, Isetnz},
+    [Ogt] = {Icmp, Icomis, Ijg,  Isetg},
+    [Oge] = {Icmp, Icomis, Ijge, Isetge},
+    [Olt] = {Icmp, Icomis, Ijl,  Isetl},
+    [Ole] = {Icmp, Icomis, Ijle, Isetle}
 };
 
 static Mode mode(Node *n)
@@ -78,6 +79,16 @@
     return ModeQ;
 }
 
+static int isintmode(Mode m)
+{
+    return m == ModeB || m == ModeW || m == ModeL || m == ModeQ;
+}
+
+static int isfloatmode(Mode m)
+{
+    return m == ModeF || m == ModeD;
+}
+
 static Loc *loc(Isel *s, Node *n)
 {
     ssize_t stkoff;
@@ -161,7 +172,7 @@
     if (src->mode == dst->mode)
         g(s, Imov, src, dst, NULL);
     else
-        g(s, Imovz, src, dst, NULL);
+        g(s, Imovzx, src, dst, NULL);
 }
 
 static void load(Isel *s, Loc *a, Loc *b)
@@ -173,7 +184,10 @@
         l = locmem(0, b, Rnone, a->mode);
     else
         l = a;
-    g(s, Imov, l, b, NULL);
+    if (isfloatmode(b->mode))
+        g(s, Imovs, l, b, NULL);
+    else
+        g(s, Imov, l, b, NULL);
 }
 
 static void stor(Isel *s, Loc *a, Loc *b)
@@ -185,7 +199,10 @@
         l = locmem(0, b, Rnone, b->mode);
     else
         l = b;
-    g(s, Imov, a, l, NULL);
+    if (isfloatmode(b->mode))
+        g(s, Imovs, a, l, NULL);
+    else
+        g(s, Imov, a, l, NULL);
 }
 
 /* ensures that a location is within a reg */
@@ -457,8 +474,12 @@
     call(s, n->expr.args[0]);
     if (argsz)
         g(s, Iadd, stkbump, rsp, NULL);
-    if (retloc)
-        g(s, Imov, retloc, ret, NULL);
+    if (retloc) {
+        if (isfloatmode(retloc->mode))
+            g(s, Imovs, retloc, ret, NULL);
+        else
+            g(s, Imov, retloc, ret, NULL);
+    }
     return ret;
 }
 
@@ -467,7 +488,6 @@
     Loc *a, *b, *c, *d, *r;
     Loc *eax, *edx, *cl; /* x86 wants some hard-coded regs */
     Node **args;
-    int sz;
 
     args = n->expr.args;
     eax = locphysreg(Reax);
@@ -480,10 +500,8 @@
         case Obor:      r = binop(s, Ior,  args[0], args[1]); break;
         case Oband:     r = binop(s, Iand, args[0], args[1]); break;
         case Obxor:     r = binop(s, Ixor, args[0], args[1]); break;
-        case Omul:      
-            if (floattype(exprtype(n)))
-                r = binop(s, Ifmul, args[0], args[1]);
-            else if (size(args[0]) == 1) {
+        case Omul:
+            if (size(args[0]) == 1) {
                 a = selexpr(s, args[0]);
                 b = selexpr(s, args[1]);
 
@@ -498,53 +516,54 @@
             break;
         case Odiv:
         case Omod:
-            if (floattype(exprtype(n))) {
-                r = binop(s, Ifdiv, args[0], args[1]);
-            } else {
-                /* these get clobbered by the div insn */
-                a = selexpr(s, args[0]);
-                b = selexpr(s, args[1]);
-                b = inr(s, b);
-                c = coreg(Reax, mode(n));
-                r = locreg(a->mode);
-                if (r->mode == ModeB)
-                    g(s, Ixor, eax, eax, NULL);
-                else
-                    g(s, Ixor, edx, edx, NULL);
-                g(s, Imov, a, c, NULL);
-                g(s, Idiv, b, NULL);
-                if (exprop(n) == Odiv)
-                    d = coreg(Reax, mode(n));
-                else if (r->mode != ModeB)
-                    d = coreg(Redx, mode(n));
-                else
-                    d = locphysreg(Rah);
-                g(s, Imov, d, r, NULL);
-            }
+            /* these get clobbered by the div insn */
+            a = selexpr(s, args[0]);
+            b = selexpr(s, args[1]);
+            b = inr(s, b);
+            c = coreg(Reax, mode(n));
+            r = locreg(a->mode);
+            if (r->mode == ModeB)
+                g(s, Ixor, eax, eax, NULL);
+            else
+                g(s, Ixor, edx, edx, NULL);
+            g(s, Imov, a, c, NULL);
+            g(s, Idiv, b, NULL);
+            if (exprop(n) == Odiv)
+                d = coreg(Reax, mode(n));
+            else if (r->mode != ModeB)
+                d = coreg(Redx, mode(n));
+            else
+                d = locphysreg(Rah);
+            g(s, Imov, d, r, NULL);
             break;
         case Oneg:
             r = selexpr(s, args[0]);
             r = inr(s, r);
-            if (floatnode(args[0])) {
-                sz = size(args[0]);
-                a = NULL;
-                b = NULL;
-                if (sz == 4) {
-                    a = locreg(ModeD);
-                    b = loclit(1 << (8*sz-1), ModeD);
-                    g(s, Imov, r, a);
-                } else if (size(args[0]) == 8) {
-                    a = locreg(ModeQ);
-                    b = loclit(1 << (8*sz-1), ModeQ);
-                    g(s, Imov, r, a, NULL);
-                }
-                g(s, Ixor, b, a, NULL);
-                g(s, Imov, a, r, NULL);
-            } else {
-                g(s, Ineg, r, NULL);
-            }
+            g(s, Ineg, r, NULL);
             break;
 
+        /* fp expressions */
+        case Ofadd:      r = binop(s, Iadds, args[0], args[1]); break;
+        case Ofsub:      r = binop(s, Isubs, args[0], args[1]); break;
+        case Ofmul:      r = binop(s, Imuls, args[0], args[1]); break;
+        case Ofdiv:      r = binop(s, Idivs, args[0], args[1]); break;
+        case Ofneg:
+            r = selexpr(s, args[0]);
+            r = inr(s, r);
+            a = NULL;
+            b = NULL;
+            if (mode(args[0]) == ModeF) {
+                a = locreg(ModeF);
+                b = loclit(1LL << (31), ModeF);
+                g(s, Imovs, r, a);
+            } else if (mode(args[0]) == ModeD) {
+                a = locreg(ModeQ);
+                b = loclit(1LL << 63, ModeQ);
+                g(s, Imov, r, a, NULL);
+            }
+            g(s, Ixor, b, a, NULL);
+            g(s, Imov, a, r, NULL);
+            break;
         case Obsl:
         case Obsr:
             a = inr(s, selexpr(s, args[0]));
@@ -621,7 +640,10 @@
             else
                 a = selexpr(s, args[0]);
             b = inri(s, b);
-            g(s, Imov, b, a, NULL);
+            if (isfloatmode(b->mode))
+                g(s, Imovs, b, a, NULL);
+            else
+                g(s, Imov, b, a, NULL);
             r = b;
             break;
         case Ocall:
@@ -670,17 +692,21 @@
             a = selexpr(s, args[0]);
             a = inr(s, a);
             r = locreg(mode(n));
-            g(s, Imovs, a, r, NULL);
+            g(s, Imovsx, a, r, NULL);
             break;
         case Oint2flt:
             a = selexpr(s, args[0]);
+            b = locreg(ModeQ);
             r = locreg(mode(n));
-            g(s, Icvttsi2sd, a, r, NULL);
+            g(s, Imovs, a, b, NULL);
+            g(s, Icvttsi2sd, b, r, NULL);
             break;
         case Oflt2int:
             a = selexpr(s, args[0]);
+            b = locreg(ModeQ);
             r = locreg(mode(n));
-            g(s, Icvttsi2sd, a, r, NULL);
+            g(s, Icvttsd2si, a, b, NULL);
+            g(s, Imov, b, r, NULL);
             break;
 
         /* These operators should never show up in the reduced trees,
@@ -713,7 +739,11 @@
             fprintf(fd, "%s", l->lbl);
             break;
         case Locreg:
-            assert(spec == 'r' || spec == 'v' || spec == 'x' || spec == 'u');
+            assert((spec == 'r' && isintmode(l->mode)) || 
+                   (spec == 'f' && isfloatmode(l->mode)) ||
+                   spec == 'v' ||
+                   spec == 'x' ||
+                   spec == 'u');
             if (l->reg.colour == Rnone)
                 fprintf(fd, "%%P.%zd", l->reg.id);
             else
@@ -770,7 +800,7 @@
      * we don't know the name of the reg to use, we need to sub it in when
      * writing... */
     switch (insn->op) {
-        case Imovz:
+        case Imovzx:
             if (insn->args[0]->mode == ModeL && insn->args[1]->mode == ModeQ) {
                 if (insn->args[1]->reg.colour) {
                     insn->op = Imov;
@@ -778,7 +808,13 @@
                 }
             }
             break;
+        case Imovs:
+            /* moving a reg to itself is dumb. */
+            if (insn->args[0]->reg.colour == insn->args[1]->reg.colour)
+                return;
+            break;
         case Imov:
+            assert(!isfloatmode(insn->args[1]->mode));
             if (insn->args[0]->type != Locreg || insn->args[1]->type != Locreg)
                 break;
             if (insn->args[0]->reg.colour == Rnone || insn->args[1]->reg.colour == Rnone)
@@ -808,7 +844,8 @@
         switch (*p) {
             case '\0':
                 goto done; /* skip the final p++ */
-            case 'r': /* register */
+            case 'r': /* int register */
+            case 'f': /* float register */
             case 'm': /* memory */
             case 'i': /* imm */
             case 'v': /* reg/mem */
@@ -890,7 +927,7 @@
     if (s->ret) {
         ret = loc(s, s->ret);
         if (floattype(exprtype(s->ret)))
-            g(s, Imov, ret, coreg(Rxmm0d, ret->mode), NULL);
+            g(s, Imovs, ret, coreg(Rxmm0d, ret->mode), NULL);
         else
             g(s, Imov, ret, coreg(Rax, ret->mode), NULL);
     }
@@ -960,6 +997,12 @@
         [4] = ".long",
         [8] = ".quad"
     };
+    union {
+        float fv;
+        double dv;
+        uint64_t qv;
+        uint32_t lv;
+    } u;
 
     assert(v->type == Nlit);
     switch (v->lit.littype) {
@@ -966,7 +1009,15 @@
         case Lint:      fprintf(fd, "\t%s %lld\n", intsz[sz], v->lit.intval);    break;
         case Lbool:     fprintf(fd, "\t.byte %d\n", v->lit.boolval);     break;
         case Lchr:      fprintf(fd, "\t.long %d\n",  v->lit.chrval);     break;
-        case Lflt:      fprintf(fd, "\t.double %f\n", v->lit.fltval);    break;
+        case Lflt:
+                if (tybase(v->lit.type)->type == Tyfloat32) {
+                    u.fv = v->lit.fltval;
+                    fprintf(fd, "\t.long 0x%"PRIx32"\n", u.lv);
+                } else if (tybase(v->lit.type)->type == Tyfloat64) {
+                    u.dv = v->lit.fltval;
+                    fprintf(fd, "\t.quad 0x%"PRIx64"\n", u.qv);
+                }
+                break;
         case Lstr:
            if (hthas(strtab, v->lit.strval)) {
                lbl = htget(strtab, v->lit.strval);
--- a/6/ra.c
+++ b/6/ra.c
@@ -40,23 +40,40 @@
 };
 
 /* A map of which registers interfere */
-Reg regmap[][Nmode] = {
-    [0]  = {Rnone, Ral, Rax, Reax, Rrax, Rxmm0f, Rxmm0d},
-    [1]  = {Rnone, Rcl, Rcx, Recx, Rrcx, Rxmm1f, Rxmm1d},
-    [2]  = {Rnone, Rdl, Rdx, Redx, Rrdx, Rxmm2f, Rxmm2d},
-    [3]  = {Rnone, Rbl, Rbx, Rebx, Rrbx, Rxmm3f, Rxmm3d},
-    [4]  = {Rnone, Rsil, Rsi, Resi, Rrsi, Rxmm4f, Rxmm4d},
-    [5]  = {Rnone, Rdil, Rdi, Redi, Rrdi, Rxmm5f, Rxmm5d},
-    [6]  = {Rnone, Rr8b, Rr8w, Rr8d, Rr8, Rxmm6f, Rxmm6d},
-    [7]  = {Rnone, Rr9b, Rr9w, Rr9d, Rr9, Rxmm7f, Rxmm7d},
-    [8]  = {Rnone, Rr10b, Rr10w, Rr10d, Rr10, Rxmm8f, Rxmm8d},
-    [9]  = {Rnone, Rr11b, Rr11w, Rr11d, Rr11, Rxmm9f, Rxmm9d},
-    [10]  = {Rnone, Rr12b, Rr12w, Rr12d, Rr12, Rxmm10f, Rxmm10d},
-    [11]  = {Rnone, Rr13b, Rr13w, Rr13d, Rr13, Rxmm11f, Rxmm11d},
-    [12]  = {Rnone, Rr14b, Rr14w, Rr14d, Rr14, Rxmm12f, Rxmm12d},
-    [13]  = {Rnone, Rr15b, Rr15w, Rr15d, Rr15, Rxmm13f, Rxmm13d},
-    [14]  = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm14f, Rxmm14d},
-    [15]  = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm15f, Rxmm15d},
+#define Northogonal 32
+Reg regmap[Northogonal][Nmode] = {
+    [0]  = {Rnone, Ral, Rax, Reax, Rrax},
+    [1]  = {Rnone, Rcl, Rcx, Recx, Rrcx},
+    [2]  = {Rnone, Rdl, Rdx, Redx, Rrdx},
+    [3]  = {Rnone, Rbl, Rbx, Rebx, Rrbx},
+    [4]  = {Rnone, Rsil, Rsi, Resi, Rrsi},
+    [5]  = {Rnone, Rdil, Rdi, Redi, Rrdi},
+    [6]  = {Rnone, Rr8b, Rr8w, Rr8d, Rr8},
+    [7]  = {Rnone, Rr9b, Rr9w, Rr9d, Rr9},
+    [8]  = {Rnone, Rr10b, Rr10w, Rr10d, Rr10},
+    [9]  = {Rnone, Rr11b, Rr11w, Rr11d, Rr11},
+    [10]  = {Rnone, Rr12b, Rr12w, Rr12d, Rr12},
+    [11]  = {Rnone, Rr13b, Rr13w, Rr13d, Rr13},
+    [12]  = {Rnone, Rr14b, Rr14w, Rr14d, Rr14},
+    [13]  = {Rnone, Rr15b, Rr15w, Rr15d, Rr15},
+    [14]  = {Rnone, Rnone, Rnone, Rnone, Rnone},
+    [15]  = {Rnone, Rnone, Rnone, Rnone, Rnone},
+    [16] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm0f, Rxmm0d},
+    [17] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm1f, Rxmm1d},
+    [18] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm2f, Rxmm2d},
+    [19] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm3f, Rxmm3d},
+    [20] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm4f, Rxmm4d},
+    [21] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm5f, Rxmm5d},
+    [22] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm6f, Rxmm6d},
+    [23] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm7f, Rxmm7d},
+    [24] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm8f, Rxmm8d},
+    [25] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm9f, Rxmm9d},
+    [26] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm10f, Rxmm10d},
+    [27] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm11f, Rxmm11d},
+    [28] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm12f, Rxmm12d},
+    [29] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm13f, Rxmm13d},
+    [30] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm14f, Rxmm14d},
+    [31] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm15f, Rxmm15d},
 };
 
 /* Which regmap entry a register maps to */
@@ -78,16 +95,16 @@
     [Rr15b]   = 13, [Rr15w] = 13, [Rr15d] = 13, [Rr15]  = 13,
 
     /* float */
-    [Rxmm0f] = 16,  [Rxmm0d] = 16,
-    [Rxmm1f] = 17,  [Rxmm1d] = 17,
-    [Rxmm2f] = 18,  [Rxmm2d] = 18,
-    [Rxmm3f] = 19,  [Rxmm3d] = 19,
-    [Rxmm4f] = 20,  [Rxmm4d] = 20,
-    [Rxmm5f] = 21,  [Rxmm5d] = 21,
-    [Rxmm6f] = 22,  [Rxmm6d] = 22,
-    [Rxmm7f] = 23,  [Rxmm7d] = 23,
-    [Rxmm8f] = 24,  [Rxmm8d] = 24,
-    [Rxmm9f] = 25,  [Rxmm9d] = 25,
+    [Rxmm0f]  = 16,  [Rxmm0d]  = 16,
+    [Rxmm1f]  = 17,  [Rxmm1d]  = 17,
+    [Rxmm2f]  = 18,  [Rxmm2d]  = 18,
+    [Rxmm3f]  = 19,  [Rxmm3d]  = 19,
+    [Rxmm4f]  = 20,  [Rxmm4d]  = 20,
+    [Rxmm5f]  = 21,  [Rxmm5d]  = 21,
+    [Rxmm6f]  = 22,  [Rxmm6d]  = 22,
+    [Rxmm7f]  = 23,  [Rxmm7d]  = 23,
+    [Rxmm8f]  = 24,  [Rxmm8d]  = 24,
+    [Rxmm9f]  = 25,  [Rxmm9d]  = 25,
     [Rxmm10f] = 26,  [Rxmm10d] = 26,
     [Rxmm11f] = 27,  [Rxmm11d] = 27,
     [Rxmm12f] = 28,  [Rxmm12d] = 28,
@@ -848,8 +865,8 @@
         }
 
         found = 0;
-        for (i = 0; i < _K[rclass(n)]; i++) {
-            if (!taken[i]) {
+        for (i = 0; i < Northogonal; i++) {
+            if (regmap[i][n->mode] && !taken[i]) {
                 if (debugopt['r']) {
                     fprintf(stdout, "\tselecting ");
                     locprint(stdout, n, 'x');
--- a/6/simp.c
+++ b/6/simp.c
@@ -1387,7 +1387,29 @@
             else
                 r = t->expr.args[0];
             break;
+        case Oneg:
+            if (istyfloat(exprtype(n))) {
+                t =mkfloat(n->line, -1.0); 
+                u = mkexpr(n->line, Olit, t, NULL);
+                t->lit.type = n->expr.type;
+                u->expr.type = n->expr.type;
+                v = simplit(s, u, &s->blobs, &s->nblobs);
+                r = mkexpr(n->line, Ofmul, v, args[0], NULL);
+                r->expr.type = n->expr.type;
+            } else {
+                r = visit(s, n);
+            }
+            break;
         default:
+            if (istyfloat(exprtype(n))) {
+                switch (exprop(n)) {
+                    case Oadd: n->expr.op = Ofadd; break;
+                    case Osub: n->expr.op = Ofsub; break;
+                    case Omul: n->expr.op = Ofmul; break;
+                    case Odiv: n->expr.op = Ofdiv; break;
+                    default: break;
+                }
+            }
             r = visit(s, n);
     }
     return r;
--- a/parse/infer.c
+++ b/parse/infer.c
@@ -1,6 +1,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
+#include <inttypes.h>
 #include <ctype.h>
 #include <string.h>
 #include <assert.h>
@@ -1201,6 +1202,7 @@
         case Oblit: case Numops:
         case Otrunc: case Oswiden: case Ozwiden:
         case Oint2flt: case Oflt2int:
+        case Ofadd: case Ofsub: case Ofmul: case Ofdiv: case Ofneg:
         case Ouget:
             die("Should not see %s in fe", opstr(exprop(n)));
             break;
@@ -1258,12 +1260,10 @@
 
 static void infernode(Inferstate *st, Node *n, Type *ret, int *sawret)
 {
-    size_t i;
-    Node *d;
-    Node *s;
-    Type *t;
-    size_t nbound;
+    size_t i, nbound;
     Node **bound;
+    Node *d, *s;
+    Type *t;
 
     if (!n)
         return;
@@ -1329,6 +1329,7 @@
             infernode(st, n->iterstmt.body, ret, sawret);
 
             t = mktyidxhack(n->line, mktyvar(n->line));
+            constrain(st, n, type(st, n->iterstmt.seq), cstrtab[Tcidx]);
             unify(st, n, type(st, n->iterstmt.seq), t);
             unify(st, n, type(st, n->iterstmt.elt), t->sub[0]);
             break;
--- a/parse/ops.def
+++ b/parse/ops.def
@@ -65,3 +65,8 @@
 O(Oswiden, 1)      /* sign-extending widening cast */
 O(Oflt2int, 1)     /* float to int conversion */
 O(Oint2flt, 1)     /* int to float conversion */
+O(Ofadd, 1)
+O(Ofsub, 1)
+O(Ofmul, 1)
+O(Ofdiv, 1)
+O(Ofneg, 1)
--- a/parse/parse.h
+++ b/parse/parse.h
@@ -384,6 +384,7 @@
 Cstr *mkcstr(int line, char *name, Node **memb, size_t nmemb, Node **funcs, size_t nfuncs);
 Type *mktylike(int line, Ty ty); /* constrains tyvar t like it was builtin ty */
 int   istysigned(Type *t);
+int   istyfloat(Type *t);
 int   isgeneric(Type *t);
 int   hasparams(Type *t);
 
--- a/parse/type.c
+++ b/parse/type.c
@@ -258,9 +258,19 @@
 
 int istysigned(Type *t)
 {
-    switch (t->type) {
+    switch (tybase(t)->type) {
         case Tyint8: case Tyint16: case Tyint:
         case Tyint32: case Tyint64: case Tylong:
+            return 1;
+        default:
+            return 0;
+    }
+}
+
+int istyfloat(Type *t)
+{
+    switch (tybase(t)->type) {
+        case Tyfloat32: case Tyfloat64:
             return 1;
         default:
             return 0;
--- a/test/sqrt.myr
+++ b/test/sqrt.myr
@@ -18,12 +18,13 @@
 
 	val = 1.0;
 	for i = 0; i < Maxiter; i++
-		iter = 0.5*(val - x/val)
+		iter = 0.5*(val + x/val)
 		if abs(val - iter) < Eps
 			-> val;
-			val = iter;
 		;;
+		val = iter;
 	;;
+	-> val
 }
 
 const main = {