소스 검색

merge with trunk

git-svn-id: branches/tg74/avx2@28412 -
tg74 11 년 전
부모
커밋
996b1c6bcf

+ 3 - 0
.gitattributes

@@ -765,6 +765,7 @@ compiler/x86/cpubase.pas svneol=native#text/plain
 compiler/x86/hlcgx86.pas svneol=native#text/plain
 compiler/x86/hlcgx86.pas svneol=native#text/plain
 compiler/x86/itcpugas.pas svneol=native#text/plain
 compiler/x86/itcpugas.pas svneol=native#text/plain
 compiler/x86/itx86int.pas svneol=native#text/plain
 compiler/x86/itx86int.pas svneol=native#text/plain
+compiler/x86/ni86mem.pas svneol=native#text/plain
 compiler/x86/nx86add.pas svneol=native#text/plain
 compiler/x86/nx86add.pas svneol=native#text/plain
 compiler/x86/nx86cal.pas svneol=native#text/plain
 compiler/x86/nx86cal.pas svneol=native#text/plain
 compiler/x86/nx86cnv.pas svneol=native#text/plain
 compiler/x86/nx86cnv.pas svneol=native#text/plain
@@ -777,6 +778,8 @@ compiler/x86/rax86.pas svneol=native#text/plain
 compiler/x86/rax86att.pas svneol=native#text/plain
 compiler/x86/rax86att.pas svneol=native#text/plain
 compiler/x86/rax86int.pas svneol=native#text/plain
 compiler/x86/rax86int.pas svneol=native#text/plain
 compiler/x86/rgx86.pas svneol=native#text/plain
 compiler/x86/rgx86.pas svneol=native#text/plain
+compiler/x86/symi86.pas svneol=native#text/plain
+compiler/x86/symx86.pas svneol=native#text/plain
 compiler/x86/x86ins.dat svneol=native#text/plain
 compiler/x86/x86ins.dat svneol=native#text/plain
 compiler/x86/x86reg.dat svneol=native#text/plain
 compiler/x86/x86reg.dat svneol=native#text/plain
 compiler/x86_64/aoptcpu.pas svneol=native#text/plain
 compiler/x86_64/aoptcpu.pas svneol=native#text/plain

+ 396 - 115
compiler/x86/aasmcpu.pas

@@ -151,10 +151,17 @@ interface
       { register class 5: XMM (both reg and r/m) }
       { register class 5: XMM (both reg and r/m) }
       OT_XMMREG    = OT_REGNORM or otf_reg_xmm;
       OT_XMMREG    = OT_REGNORM or otf_reg_xmm;
       OT_XMMRM     = OT_REGMEM or otf_reg_xmm;
       OT_XMMRM     = OT_REGMEM or otf_reg_xmm;
+      OT_XMEM32    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS32;
+      OT_XMEM64    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS64;
 
 
       { register class 5: XMM (both reg and r/m) }
       { register class 5: XMM (both reg and r/m) }
       OT_YMMREG    = OT_REGNORM or otf_reg_ymm;
       OT_YMMREG    = OT_REGNORM or otf_reg_ymm;
       OT_YMMRM     = OT_REGMEM or otf_reg_ymm;
       OT_YMMRM     = OT_REGMEM or otf_reg_ymm;
+      OT_YMEM32    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS32;
+      OT_YMEM64    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS64;
+
+      { Vector-Memory operands }
+      OT_VMEM_ANY  = OT_XMEM32 or OT_XMEM64 or OT_YMEM32 or OT_YMEM64;
 
 
       { Memory operands }
       { Memory operands }
       OT_MEM8      = OT_MEMORY or OT_BITS8;
       OT_MEM8      = OT_MEMORY or OT_BITS8;
@@ -228,7 +235,9 @@ interface
                          msiMultiple, msiMultiple8, msiMultiple16, msiMultiple32,
                          msiMultiple, msiMultiple8, msiMultiple16, msiMultiple32,
                          msiMultiple64, msiMultiple128, msiMultiple256,
                          msiMultiple64, msiMultiple128, msiMultiple256,
                          msiMemRegSize, msiMemRegx16y32, msiMemRegx32y64, msiMemRegx64y128, msiMemRegx64y256,
                          msiMemRegSize, msiMemRegx16y32, msiMemRegx32y64, msiMemRegx64y128, msiMemRegx64y256,
-                         msiMem8, msiMem16, msiMem32, msiMem64, msiMem128, msiMem256);
+                         msiMem8, msiMem16, msiMem32, msiMem64, msiMem128, msiMem256,
+                         msiXMem32, msiXMem64, msiYMem32, msiYMem64,
+                         msiVMemMultiple, msiVMemRegSize);
 
 
       TConstSizeInfo  = (csiUnkown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
       TConstSizeInfo  = (csiUnkown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
 
 
@@ -242,8 +251,10 @@ interface
       MemRefMultiples: set of TMemRefSizeInfo = [msiMultiple, msiMultiple8,
       MemRefMultiples: set of TMemRefSizeInfo = [msiMultiple, msiMultiple8,
                                                  msiMultiple16, msiMultiple32,
                                                  msiMultiple16, msiMultiple32,
                                                  msiMultiple64, msiMultiple128,
                                                  msiMultiple64, msiMultiple128,
-                                                 msiMultiple256];
+                                                 msiMultiple256, msiVMemMultiple];
 
 
+      MemRefSizeInfoVMems: Set of TMemRefSizeInfo = [msiXMem32, msiXMem64, msiYMem32, msiYMem64,
+                                                     msiVMemMultiple, msiVMemRegSize];
 
 
       InsProp : array[tasmop] of TInsProp =
       InsProp : array[tasmop] of TInsProp =
 {$if defined(x86_64)}
 {$if defined(x86_64)}
@@ -428,10 +439,13 @@ implementation
        IF_SSE42  = $00200000;
        IF_SSE42  = $00200000;
        IF_AVX    = $00200000;
        IF_AVX    = $00200000;
        IF_AVX2   = $00200000;
        IF_AVX2   = $00200000;
-       IF_SANDYBRIDGE = $00200000;
-       IF_BMI1 = $00200000;
-       IF_BMI2 = $00200000;
+       IF_BMI1   = $00200000;
+       IF_BMI2   = $00200000;
+       IF_16BITONLY = $00200000;
+       IF_FMA    = $00200000;
+       IF_FMA4   = $00200000;
 
 
+       IF_PLEVEL = $0F000000;  { mask for processor level }
        IF_8086   = $00000000;  { 8086 instruction  }
        IF_8086   = $00000000;  { 8086 instruction  }
        IF_186    = $01000000;  { 186+ instruction  }
        IF_186    = $01000000;  { 186+ instruction  }
        IF_286    = $02000000;  { 286+ instruction  }
        IF_286    = $02000000;  { 286+ instruction  }
@@ -440,14 +454,14 @@ implementation
        IF_PENT   = $05000000;  { Pentium instruction  }
        IF_PENT   = $05000000;  { Pentium instruction  }
        IF_P6     = $06000000;  { P6 instruction  }
        IF_P6     = $06000000;  { P6 instruction  }
        IF_KATMAI = $07000000;  { Katmai instructions  }
        IF_KATMAI = $07000000;  { Katmai instructions  }
-       { Willamette instructions }
-       IF_WILLAMETTE = $08000000;
-       { Prescott instructions }
-       IF_PRESCOTT = $09000000;
+       IF_WILLAMETTE = $08000000; { Willamette instructions }
+       IF_PRESCOTT   = $09000000; { Prescott instructions }
        IF_X86_64 = $0a000000;
        IF_X86_64 = $0a000000;
        IF_CYRIX  = $0b000000;  { Cyrix-specific instruction  }
        IF_CYRIX  = $0b000000;  { Cyrix-specific instruction  }
        IF_AMD    = $0c000000;  { AMD-specific instruction  }
        IF_AMD    = $0c000000;  { AMD-specific instruction  }
        IF_CENTAUR = $0d000000;  { centaur-specific instruction  }
        IF_CENTAUR = $0d000000;  { centaur-specific instruction  }
+       IF_SANDYBRIDGE = $0e000000; { Sandybridge-specific instruction }
+       IF_NEC    = $0f000000;  { NEC V20/V30 instruction }
        { added flags }
        { added flags }
        IF_PRE    = $40000000;  { it's a prefix instruction }
        IF_PRE    = $40000000;  { it's a prefix instruction }
        IF_PASS2  = $80000000;  { if the instruction can change in a second pass }
        IF_PASS2  = $80000000;  { if the instruction can change in a second pass }
@@ -1136,7 +1150,22 @@ implementation
                      then
                      then
                     begin
                     begin
                       { create ot field }
                       { create ot field }
-                      if (ot and OT_SIZE_MASK)=0 then
+                      if (reg_ot_table[findreg_by_number(ref^.base)] and OT_REG_GPR = OT_REG_GPR) and
+                         ((reg_ot_table[findreg_by_number(ref^.index)] = OT_XMMREG) or
+                          (reg_ot_table[findreg_by_number(ref^.index)] = OT_YMMREG)
+                         ) then
+                        // AVX2 - vector-memory-referenz (e.g. vgatherdpd xmm0, [rax  xmm1], xmm2)
+                        ot := (reg_ot_table[findreg_by_number(ref^.base)] and OT_REG_GPR) or
+                              (reg_ot_table[findreg_by_number(ref^.index)])
+                      else if (ref^.base = NR_NO) and
+                              ((reg_ot_table[findreg_by_number(ref^.index)] = OT_XMMREG) or
+                               (reg_ot_table[findreg_by_number(ref^.index)] = OT_YMMREG)
+                              ) then
+                        // AVX2 - vector-memory-referenz without base-register (e.g. vgatherdpd xmm0, [xmm1], xmm2)
+                        ot := (OT_REG_GPR) or
+                              (reg_ot_table[findreg_by_number(ref^.index)])
+
+                      else if (ot and OT_SIZE_MASK)=0 then
                         ot:=OT_MEMORY_ANY or opsize_2_type[i,opsize]
                         ot:=OT_MEMORY_ANY or opsize_2_type[i,opsize]
                       else
                       else
                         ot:=OT_MEMORY_ANY or (ot and OT_SIZE_MASK);
                         ot:=OT_MEMORY_ANY or (ot and OT_SIZE_MASK);
@@ -1215,7 +1244,11 @@ implementation
                   begin
                   begin
                     { allow 2nd, 3rd or 4th operand being a constant and expect no size for shuf* etc. }
                     { allow 2nd, 3rd or 4th operand being a constant and expect no size for shuf* etc. }
                     { further, allow AAD and AAM with imm. operand }
                     { further, allow AAD and AAM with imm. operand }
-                    if (opsize=S_NO) and not((i in [1,2,3]) or ((i=0) and (opcode in [A_AAD,A_AAM]))) then
+                    if (opsize=S_NO) and not((i in [1,2,3]) 
+{$ifndef x86_64}
+                      or ((i=0) and (opcode in [A_AAD,A_AAM]))
+{$endif x86_64}
+                      ) then
                       message(asmr_e_invalid_opcode_and_operand);
                       message(asmr_e_invalid_opcode_and_operand);
                     if (opsize<>S_W) and (aint(val)>=-128) and (val<=127) then
                     if (opsize<>S_W) and (aint(val)>=-128) and (val<=127) then
                       ot:=OT_IMM8 or OT_SIGNED
                       ot:=OT_IMM8 or OT_SIGNED
@@ -1282,6 +1315,7 @@ implementation
          begin
          begin
            insot:=p^.optypes[i];
            insot:=p^.optypes[i];
            currot:=oper[i]^.ot;
            currot:=oper[i]^.ot;
+
            { Check the operand flags }
            { Check the operand flags }
            if (insot and (not currot) and OT_NON_SIZE)<>0 then
            if (insot and (not currot) and OT_NON_SIZE)<>0 then
              exit;
              exit;
@@ -1509,9 +1543,9 @@ implementation
       end;
       end;
 
 
     const
     const
-      segprefixes: array[NR_CS..NR_GS] of Byte=(
-      //cs   ds   es   ss   fs   gs
-        $2E, $3E, $26, $36, $64, $65
+      segprefixes: array[NR_ES..NR_GS] of Byte=(
+      // es  cs   ss   ds   fs   gs
+        $26, $2E, $36, $3E, $64, $65
       );
       );
 
 
     procedure taicpu.Pass2(objdata:TObjData);
     procedure taicpu.Pass2(objdata:TObjData);
@@ -1521,7 +1555,7 @@ implementation
          exit;
          exit;
         current_filepos:=fileinfo;
         current_filepos:=fileinfo;
         { Segment override }
         { Segment override }
-        if (segprefix>=NR_CS) and (segprefix<=NR_GS) then
+        if (segprefix>=NR_ES) and (segprefix<=NR_GS) then
          begin
          begin
            objdata.writebytes(segprefixes[segprefix],1);
            objdata.writebytes(segprefixes[segprefix],1);
            { fix the offset for GenNode }
            { fix the offset for GenNode }
@@ -1554,31 +1588,39 @@ implementation
       end;
       end;
 
 
 
 
+    procedure badreg(r:Tregister);
+      begin
+        Message1(asmw_e_invalid_register,generic_regname(r));
+      end;
+
+
     function regval(r:Tregister):byte;
     function regval(r:Tregister):byte;
       const
       const
-    {$if defined(x86_64)}
-        opcode_table:array[tregisterindex] of tregisterindex = (
-          {$i r8664op.inc}
-        );
-    {$elseif defined(i386)}
-        opcode_table:array[tregisterindex] of tregisterindex = (
-          {$i r386op.inc}
-        );
-    {$elseif defined(i8086)}
-        opcode_table:array[tregisterindex] of tregisterindex = (
-          {$i r8086op.inc}
-        );
-    {$endif}
+        intsupreg2opcode: array[0..7] of byte=
+        // ax cx dx bx si di bp sp   -- in x86reg.dat
+        // ax cx dx bx sp bp si di   -- needed order
+          (0, 1, 2, 3, 6, 7, 5, 4);
+        maxsupreg: array[tregistertype] of tsuperregister=
+{$ifdef x86_64}
+          (0, 16, 9, 8, 16, 32, 0);
+{$else x86_64}
+          (0,  8, 9, 8,  8, 32, 0);
+{$endif x86_64}
       var
       var
-        regidx : tregisterindex;
+        rs: tsuperregister;
+        rt: tregistertype;
       begin
       begin
-        regidx:=findreg_by_number(r);
-        if regidx<>0 then
-          result:=opcode_table[regidx]
-        else
+        rs:=getsupreg(r);
+        rt:=getregtype(r);
+        if (rs>=maxsupreg[rt]) then
+          badreg(r);
+        result:=rs and 7;
+        if (rt=R_INTREGISTER) then
           begin
           begin
-            Message1(asmw_e_invalid_register,generic_regname(r));
-            result:=0;
+            if (rs<8) then
+              result:=intsupreg2opcode[rs];
+            if getsubreg(r)=R_SUBH then
+              inc(result,4);
           end;
           end;
       end;
       end;
 
 
@@ -1638,7 +1680,11 @@ implementation
         s:=input.ref^.scalefactor;
         s:=input.ref^.scalefactor;
         o:=input.ref^.offset;
         o:=input.ref^.offset;
         sym:=input.ref^.symbol;
         sym:=input.ref^.symbol;
-        if ((ir<>NR_NO) and (getregtype(ir)<>R_INTREGISTER)) or
+
+        //if ((ir<>NR_NO) and (getregtype(ir)<>R_INTREGISTER)) or
+        //   ((br<>NR_NO) and (br<>NR_RIP) and (getregtype(br)<>R_INTREGISTER)) then
+        if ((ir<>NR_NO) and (getregtype(ir)=R_MMREGISTER) and (br<>NR_NO) and (getregtype(br)<>R_INTREGISTER)) or // vector memory (AVX2)
+           ((ir<>NR_NO) and (getregtype(ir)<>R_INTREGISTER) and (getregtype(ir)<>R_MMREGISTER)) or
            ((br<>NR_NO) and (br<>NR_RIP) and (getregtype(br)<>R_INTREGISTER)) then
            ((br<>NR_NO) and (br<>NR_RIP) and (getregtype(br)<>R_INTREGISTER)) then
           internalerror(200301081);
           internalerror(200301081);
         { it's direct address }
         { it's direct address }
@@ -1659,10 +1705,20 @@ implementation
         else
         else
         { it's an indirection }
         { it's an indirection }
          begin
          begin
-           { 16 bit or 32 bit address? }
-           if ((ir<>NR_NO) and (isub<>R_SUBADDR)) or
-              ((br<>NR_NO) and (bsub<>R_SUBADDR)) then
+           { 16 bit? }
+
+           if ((ir<>NR_NO) and (isub in [R_SUBMMX,R_SUBMMY]) and
+               (br<>NR_NO) and (bsub=R_SUBADDR)
+              ) then
+           begin
+             // vector memory (AVX2) =>> ignore
+           end
+           else if ((ir<>NR_NO) and (isub<>R_SUBADDR) and (isub<>R_SUBD)) or
+                   ((br<>NR_NO) and (bsub<>R_SUBADDR) and (bsub<>R_SUBD)) then
+           begin
              message(asmw_e_16bit_32bit_not_supported);
              message(asmw_e_16bit_32bit_not_supported);
+           end;
+
            { wrong, for various reasons }
            { wrong, for various reasons }
            if (ir=NR_ESP) or ((s<>1) and (s<>2) and (s<>4) and (s<>8) and (ir<>NR_NO)) then
            if (ir=NR_ESP) or ((s<>1) and (s<>2) and (s<>4) and (s<>8) and (ir<>NR_NO)) then
             exit;
             exit;
@@ -1673,21 +1729,37 @@ implementation
 
 
            { base }
            { base }
            case br of
            case br of
+             NR_R8D,
+             NR_EAX,
              NR_R8,
              NR_R8,
              NR_RAX : base:=0;
              NR_RAX : base:=0;
+             NR_R9D,
+             NR_ECX,
              NR_R9,
              NR_R9,
              NR_RCX : base:=1;
              NR_RCX : base:=1;
+             NR_R10D,
+             NR_EDX,
              NR_R10,
              NR_R10,
              NR_RDX : base:=2;
              NR_RDX : base:=2;
+             NR_R11D,
+             NR_EBX,
              NR_R11,
              NR_R11,
              NR_RBX : base:=3;
              NR_RBX : base:=3;
+             NR_R12D,
+             NR_ESP,
              NR_R12,
              NR_R12,
              NR_RSP : base:=4;
              NR_RSP : base:=4;
+             NR_R13D,
+             NR_EBP,
              NR_R13,
              NR_R13,
              NR_NO,
              NR_NO,
              NR_RBP : base:=5;
              NR_RBP : base:=5;
+             NR_R14D,
+             NR_ESI,
              NR_R14,
              NR_R14,
              NR_RSI : base:=6;
              NR_RSI : base:=6;
+             NR_R15D,
+             NR_EDI,
              NR_R15,
              NR_R15,
              NR_RDI : base:=7;
              NR_RDI : base:=7;
            else
            else
@@ -1695,22 +1767,70 @@ implementation
            end;
            end;
            { index }
            { index }
            case ir of
            case ir of
+             NR_R8D,
+             NR_EAX,
              NR_R8,
              NR_R8,
-             NR_RAX : index:=0;
+             NR_RAX,
+             NR_XMM0,
+             NR_XMM8,
+             NR_YMM0,
+             NR_YMM8  : index:=0;
+             NR_R9D,
+             NR_ECX,
              NR_R9,
              NR_R9,
-             NR_RCX : index:=1;
+             NR_RCX,
+             NR_XMM1,
+             NR_XMM9,
+             NR_YMM1,
+             NR_YMM9  : index:=1;
+             NR_R10D,
+             NR_EDX,
              NR_R10,
              NR_R10,
-             NR_RDX : index:=2;
+             NR_RDX,
+             NR_XMM2,
+             NR_XMM10,
+             NR_YMM2,
+             NR_YMM10 : index:=2;
+             NR_R11D,
+             NR_EBX,
              NR_R11,
              NR_R11,
-             NR_RBX : index:=3;
+             NR_RBX,
+             NR_XMM3,
+             NR_XMM11,
+             NR_YMM3,
+             NR_YMM11 : index:=3;
+             NR_R12D,
+             NR_ESP,
              NR_R12,
              NR_R12,
-             NR_NO  : index:=4;
+             NR_NO,
+             NR_XMM4,
+             NR_XMM12,
+             NR_YMM4,
+             NR_YMM12 : index:=4;
+             NR_R13D,
+             NR_EBP,
              NR_R13,
              NR_R13,
-             NR_RBP : index:=5;
+             NR_RBP,
+             NR_XMM5,
+             NR_XMM13,
+             NR_YMM5,
+             NR_YMM13: index:=5;
+             NR_R14D,
+             NR_ESI,
              NR_R14,
              NR_R14,
-             NR_RSI : index:=6;
+             NR_RSI,
+             NR_XMM6,
+             NR_XMM14,
+             NR_YMM6,
+             NR_YMM14: index:=6;
+             NR_R15D,
+             NR_EDI,
              NR_R15,
              NR_R15,
-             NR_RDI : index:=7;
+             NR_RDI,
+             NR_XMM7,
+             NR_XMM15,
+             NR_YMM7,
+             NR_YMM15: index:=7;
            else
            else
              exit;
              exit;
            end;
            end;
@@ -1725,7 +1845,7 @@ implementation
            end;
            end;
            { If rbp or r13 is used we must always include an offset }
            { If rbp or r13 is used we must always include an offset }
            if (br=NR_NO) or
            if (br=NR_NO) or
-              ((br<>NR_RBP) and (br<>NR_R13) and (o=0) and (sym=nil)) then
+              ((br<>NR_RBP) and (br<>NR_R13) and (br<>NR_EBP) and (br<>NR_R13D) and (o=0) and (sym=nil)) then
             md:=0
             md:=0
            else
            else
             if ((o>=-128) and (o<=127) and (sym=nil)) then
             if ((o>=-128) and (o<=127) and (sym=nil)) then
@@ -1737,7 +1857,7 @@ implementation
            else
            else
             output.bytes:=md;
             output.bytes:=md;
            { SIB needed ? }
            { SIB needed ? }
-           if (ir=NR_NO) and (br<>NR_RSP) and (br<>NR_R12) then
+           if (ir=NR_NO) and (br<>NR_RSP) and (br<>NR_R12) and (br<>NR_ESP) and (br<>NR_R12D)  then
             begin
             begin
               output.sib_present:=false;
               output.sib_present:=false;
               output.modrm:=(md shl 6) or (rfield shl 3) or base;
               output.modrm:=(md shl 6) or (rfield shl 3) or base;
@@ -1905,7 +2025,7 @@ implementation
         len     : shortint;
         len     : shortint;
         ea_data : ea;
         ea_data : ea;
         exists_vex: boolean;
         exists_vex: boolean;
-        exists_vex_extention: boolean;
+        exists_vex_extension: boolean;
         exists_prefix_66: boolean;
         exists_prefix_66: boolean;
         exists_prefix_F2: boolean;
         exists_prefix_F2: boolean;
         exists_prefix_F3: boolean;
         exists_prefix_F3: boolean;
@@ -1916,7 +2036,7 @@ implementation
         len:=0;
         len:=0;
         codes:=@p^.code[0];
         codes:=@p^.code[0];
         exists_vex := false;
         exists_vex := false;
-        exists_vex_extention := false;
+        exists_vex_extension := false;
         exists_prefix_66 := false;
         exists_prefix_66 := false;
         exists_prefix_F2 := false;
         exists_prefix_F2 := false;
         exists_prefix_F3 := false;
         exists_prefix_F3 := false;
@@ -2072,26 +2192,34 @@ implementation
             243: // REX.W = 1
             243: // REX.W = 1
                  // =>> VEX prefix length = 3
                  // =>> VEX prefix length = 3
               begin
               begin
-                if not(exists_vex_extention) then
+                if not(exists_vex_extension) then
                 begin
                 begin
                   inc(len);
                   inc(len);
-                  exists_vex_extention := true;
+                  exists_vex_extension := true;
                 end;
                 end;
               end;
               end;
             244: ; // VEX length bit
             244: ; // VEX length bit
+            246, // operand 2 (ymmreg) encoded immediate byte (bit 4-7)
             247: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7)
             247: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7)
-            248: // VEX-Extention prefix $0F
+            248: // VEX-Extension prefix $0F
                  // ignore for calculating length
                  // ignore for calculating length
                  ;
                  ;
-            249, // VEX-Extention prefix $0F38
-            250: // VEX-Extention prefix $0F3A
+            249, // VEX-Extension prefix $0F38
+            250: // VEX-Extension prefix $0F3A
               begin
               begin
-                if not(exists_vex_extention) then
+                if not(exists_vex_extension) then
                 begin
                 begin
                   inc(len);
                   inc(len);
-                  exists_vex_extention := true;
+                  exists_vex_extension := true;
                 end;
                 end;
               end;
               end;
+            192,193,194:
+              begin
+{$ifdef x86_64}
+                if (oper[c and 3]^.ot and OT_SIZE_MASK)=OT_BITS32 then
+                  inc(len);
+{$endif x86_64}
+              end;
             else
             else
              InternalError(200603141);
              InternalError(200603141);
           end;
           end;
@@ -2120,8 +2248,8 @@ implementation
           if exists_prefix_F3 then dec(len);
           if exists_prefix_F3 then dec(len);
 
 
   {$ifdef x86_64}
   {$ifdef x86_64}
-          if not(exists_vex_extention) then
-            if rex and $0B <> 0 then inc(len);  // REX.WXB <> 0 =>> needed VEX-Extention
+          if not(exists_vex_extension) then
+            if rex and $0B <> 0 then inc(len);  // REX.WXB <> 0 =>> needed VEX-Extension
   {$endif x86_64}
   {$endif x86_64}
 
 
         end;
         end;
@@ -2188,6 +2316,7 @@ implementation
        * \362          - VEX prefix for AVX instructions
        * \362          - VEX prefix for AVX instructions
        * \363          - VEX W1
        * \363          - VEX W1
        * \364          - VEX Vector length 256
        * \364          - VEX Vector length 256
+       * \366          - operand 2 (ymmreg) encoded in bit 4-7 of the immediate byte
        * \367          - operand 3 (ymmreg) encoded in bit 4-7 of the immediate byte
        * \367          - operand 3 (ymmreg) encoded in bit 4-7 of the immediate byte
 
 
        * \370          - VEX 0F-FLAG
        * \370          - VEX 0F-FLAG
@@ -2304,7 +2433,7 @@ implementation
         data,s,opidx : longint;
         data,s,opidx : longint;
         ea_data : ea;
         ea_data : ea;
         relsym : TObjSymbol;
         relsym : TObjSymbol;
-        needed_VEX_Extention: boolean;
+        needed_VEX_Extension: boolean;
         needed_VEX: boolean;
         needed_VEX: boolean;
         opmode: integer;
         opmode: integer;
         VEXvvvv: byte;
         VEXvvvv: byte;
@@ -2313,6 +2442,14 @@ implementation
         { safety check }
         { safety check }
         if objdata.currobjsec.size<>longword(insoffset) then
         if objdata.currobjsec.size<>longword(insoffset) then
            internalerror(200130121);
            internalerror(200130121);
+
+        { those variables are initialized inside local procedures, the dfa cannot handle this yet }
+        currsym:=nil;
+        currabsreloc:=RELOC_NONE;
+        currabsreloc32:=RELOC_NONE;
+        currrelreloc:=RELOC_NONE;
+        currval:=0;
+
         { load data to write }
         { load data to write }
         codes:=insentry^.code;
         codes:=insentry^.code;
 {$ifdef x86_64}
 {$ifdef x86_64}
@@ -2329,7 +2466,7 @@ implementation
         // needed VEX Prefix (for AVX etc.)
         // needed VEX Prefix (for AVX etc.)
 
 
         needed_VEX := false;
         needed_VEX := false;
-        needed_VEX_Extention := false;
+        needed_VEX_Extension := false;
         opmode   := -1;
         opmode   := -1;
         VEXvvvv  := 0;
         VEXvvvv  := 0;
         VEXmmmmm := 0;
         VEXmmmmm := 0;
@@ -2350,17 +2487,17 @@ implementation
             241: VEXvvvv                := VEXvvvv  OR $01; // set SIMD-prefix $66
             241: VEXvvvv                := VEXvvvv  OR $01; // set SIMD-prefix $66
             242: needed_VEX             := true;
             242: needed_VEX             := true;
             243: begin
             243: begin
-                   needed_VEX_Extention := true;
+                   needed_VEX_Extension := true;
                    VEXvvvv              := VEXvvvv  OR (1 shl 7); // set REX.W
                    VEXvvvv              := VEXvvvv  OR (1 shl 7); // set REX.W
                  end;
                  end;
             244: VEXvvvv                := VEXvvvv  OR $04; // vectorlength = 256 bits AND no scalar
             244: VEXvvvv                := VEXvvvv  OR $04; // vectorlength = 256 bits AND no scalar
             248: VEXmmmmm               := VEXmmmmm OR $01; // set leading opcode byte $0F
             248: VEXmmmmm               := VEXmmmmm OR $01; // set leading opcode byte $0F
             249: begin
             249: begin
-                   needed_VEX_Extention := true;
+                   needed_VEX_Extension := true;
                    VEXmmmmm             := VEXmmmmm OR $02; // set leading opcode byte $0F38
                    VEXmmmmm             := VEXmmmmm OR $02; // set leading opcode byte $0F38
                  end;
                  end;
             250: begin
             250: begin
-                   needed_VEX_Extention := true;
+                   needed_VEX_Extension := true;
                    VEXmmmmm             := VEXmmmmm OR $03; // set leading opcode byte $0F3A
                    VEXmmmmm             := VEXmmmmm OR $03; // set leading opcode byte $0F3A
                  end;
                  end;
 
 
@@ -2390,14 +2527,14 @@ implementation
           end
           end
           else Internalerror(777101);
           else Internalerror(777101);
 
 
-          if not(needed_VEX_Extention) then
+          if not(needed_VEX_Extension) then
           begin
           begin
             {$ifdef x86_64}
             {$ifdef x86_64}
-              if rex and $0B <> 0 then needed_VEX_Extention := true;
+              if rex and $0B <> 0 then needed_VEX_Extension := true;
             {$endif x86_64}
             {$endif x86_64}
           end;
           end;
 
 
-          if needed_VEX_Extention then
+          if needed_VEX_Extension then
           begin
           begin
             // VEX-Prefix-Length = 3 Bytes
             // VEX-Prefix-Length = 3 Bytes
             bytes[0]:=$C4;
             bytes[0]:=$C4;
@@ -2437,7 +2574,7 @@ implementation
         end
         end
         else
         else
         begin
         begin
-          needed_VEX_Extention := false;
+          needed_VEX_Extension := false;
           opmode := -1;
           opmode := -1;
         end;
         end;
 
 
@@ -2648,6 +2785,16 @@ implementation
                 else
                 else
                   objdata.writebytes(currval,4);
                   objdata.writebytes(currval,4);
               end;
               end;
+            192,193,194:
+              begin
+{$ifdef x86_64}
+                if (oper[c and 3]^.ot and OT_SIZE_MASK)=OT_BITS32 then
+                  begin
+                    bytes[0]:=$67;
+                    objdata.writebytes(bytes,1);
+                  end;
+{$endif x86_64}
+              end;
             200 :   { fixed 16-bit addr }
             200 :   { fixed 16-bit addr }
 {$ifndef x86_64}
 {$ifndef x86_64}
               begin
               begin
@@ -2724,27 +2871,48 @@ implementation
                   are not needed }
                   are not needed }
               end;
               end;
             242..244: ; // VEX flags =>> nothing todo
             242..244: ; // VEX flags =>> nothing todo
-                 247: begin
-                        if needed_VEX then
-                        begin
-                          if ops = 4 then
-                          begin
-                            if (oper[3]^.typ=top_reg) then
-                            begin
-                              if (oper[3]^.ot and otf_reg_xmm <> 0) or
-                                 (oper[3]^.ot and otf_reg_ymm <> 0) then
-                              begin
-                                bytes[0] := ((getsupreg(oper[3]^.reg) and 15) shl 4);
-                                objdata.writebytes(bytes,1);
-                              end
-                              else Internalerror(777102);
-                            end
-                            else Internalerror(777103);
-                          end
-                          else Internalerror(777104);
-                        end
-                        else Internalerror(777105);
-                      end;
+            246: begin
+                   if needed_VEX then
+                   begin
+                     if ops = 4 then
+                     begin
+                       if (oper[2]^.typ=top_reg) then
+                       begin
+                         if (oper[2]^.ot and otf_reg_xmm <> 0) or
+                            (oper[2]^.ot and otf_reg_ymm <> 0) then
+                         begin
+                           bytes[0] := ((getsupreg(oper[2]^.reg) and 15) shl 4);
+                           objdata.writebytes(bytes,1);
+                         end
+                         else Internalerror(2014032001);
+                       end
+                       else Internalerror(2014032002);
+                     end
+                     else Internalerror(2014032003);
+                   end
+                   else Internalerror(2014032004);
+                 end;
+            247: begin
+                   if needed_VEX then
+                   begin
+                     if ops = 4 then
+                     begin
+                       if (oper[3]^.typ=top_reg) then
+                       begin
+                         if (oper[3]^.ot and otf_reg_xmm <> 0) or
+                            (oper[3]^.ot and otf_reg_ymm <> 0) then
+                         begin
+                           bytes[0] := ((getsupreg(oper[3]^.reg) and 15) shl 4);
+                           objdata.writebytes(bytes,1);
+                         end
+                         else Internalerror(2014032005);
+                       end
+                       else Internalerror(2014032006);
+                     end
+                     else Internalerror(2014032007);
+                   end
+                   else Internalerror(2014032008);
+                 end;
             248..250: ; // VEX flags =>> nothing todo
             248..250: ; // VEX flags =>> nothing todo
             31,
             31,
             48,49,50 :
             48,49,50 :
@@ -2926,8 +3094,6 @@ implementation
                 end;
                 end;
               end;
               end;
           end;
           end;
-        { Special cases that can't be decoded from the InsChanges flags }
-        operation_type_table^[A_IMUL,1]:=operand_readwrite;
       end;
       end;
 
 
 
 
@@ -2950,6 +3116,41 @@ implementation
                 internalerror(200506055);
                 internalerror(200506055);
             end
             end
           end
           end
+        { IMUL has 1, 2 and 3-operand forms }
+        else if opcode=A_IMUL then
+          begin
+            case ops of
+              1:
+                if opnr=0 then
+                  result:=operand_read
+                else
+                  internalerror(2014011802);
+              2:
+                begin
+                  case opnr of
+                    0:
+                      result:=operand_read;
+                    1:
+                      result:=operand_readwrite;
+                    else
+                      internalerror(2014011803);
+                  end;
+                end;
+              3:
+                begin
+                  case opnr of
+                    0,1:
+                      result:=operand_read;
+                    2:
+                      result:=operand_write;
+                    else
+                      internalerror(2014011804);
+                  end;
+                end;
+              else
+                internalerror(2014011805);
+            end;
+          end
         else
         else
           result:=operation_type_table^[opcode,opnr];
           result:=operation_type_table^[opcode,opnr];
       end;
       end;
@@ -2959,10 +3160,14 @@ implementation
       var
       var
         tmpref: treference;
         tmpref: treference;
       begin
       begin
+        tmpref:=ref;
+{$ifdef i8086}
+        if tmpref.segment=NR_SS then
+          tmpref.segment:=NR_NO;
+{$endif i8086}
         case getregtype(r) of
         case getregtype(r) of
           R_INTREGISTER :
           R_INTREGISTER :
             begin
             begin
-              tmpref:=ref;
               if getsubreg(r)=R_SUBH then
               if getsubreg(r)=R_SUBH then
                 inc(tmpref.offset);
                 inc(tmpref.offset);
               { we don't need special code here for 32 bit loads on x86_64, since
               { we don't need special code here for 32 bit loads on x86_64, since
@@ -2973,24 +3178,24 @@ implementation
             if current_settings.fputype in fpu_avx_instructionsets then
             if current_settings.fputype in fpu_avx_instructionsets then
               case getsubreg(r) of
               case getsubreg(r) of
                 R_SUBMMD:
                 R_SUBMMD:
-                  result:=taicpu.op_ref_reg(A_VMOVSD,reg2opsize(r),ref,r);
+                  result:=taicpu.op_ref_reg(A_VMOVSD,reg2opsize(r),tmpref,r);
                 R_SUBMMS:
                 R_SUBMMS:
-                  result:=taicpu.op_ref_reg(A_VMOVSS,reg2opsize(r),ref,r);
+                  result:=taicpu.op_ref_reg(A_VMOVSS,reg2opsize(r),tmpref,r);
                 R_SUBQ,
                 R_SUBQ,
                 R_SUBMMWHOLE:
                 R_SUBMMWHOLE:
-                  result:=taicpu.op_ref_reg(A_VMOVQ,S_NO,ref,r);
+                  result:=taicpu.op_ref_reg(A_VMOVQ,S_NO,tmpref,r);
                 else
                 else
                   internalerror(200506043);
                   internalerror(200506043);
               end
               end
             else
             else
               case getsubreg(r) of
               case getsubreg(r) of
                 R_SUBMMD:
                 R_SUBMMD:
-                  result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
+                  result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),tmpref,r);
                 R_SUBMMS:
                 R_SUBMMS:
-                  result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),ref,r);
+                  result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),tmpref,r);
                 R_SUBQ,
                 R_SUBQ,
                 R_SUBMMWHOLE:
                 R_SUBMMWHOLE:
-                  result:=taicpu.op_ref_reg(A_MOVQ,S_NO,ref,r);
+                  result:=taicpu.op_ref_reg(A_MOVQ,S_NO,tmpref,r);
                 else
                 else
                   internalerror(200506043);
                   internalerror(200506043);
               end;
               end;
@@ -3005,10 +3210,14 @@ implementation
         size: topsize;
         size: topsize;
         tmpref: treference;
         tmpref: treference;
       begin
       begin
+        tmpref:=ref;
+{$ifdef i8086}
+        if tmpref.segment=NR_SS then
+          tmpref.segment:=NR_NO;
+{$endif i8086}
         case getregtype(r) of
         case getregtype(r) of
           R_INTREGISTER :
           R_INTREGISTER :
             begin
             begin
-              tmpref:=ref;
               if getsubreg(r)=R_SUBH then
               if getsubreg(r)=R_SUBH then
                 inc(tmpref.offset);
                 inc(tmpref.offset);
               size:=reg2opsize(r);
               size:=reg2opsize(r);
@@ -3027,24 +3236,24 @@ implementation
             if current_settings.fputype in fpu_avx_instructionsets then
             if current_settings.fputype in fpu_avx_instructionsets then
               case getsubreg(r) of
               case getsubreg(r) of
                 R_SUBMMD:
                 R_SUBMMD:
-                  result:=taicpu.op_reg_ref(A_VMOVSD,reg2opsize(r),r,ref);
+                  result:=taicpu.op_reg_ref(A_VMOVSD,reg2opsize(r),r,tmpref);
                 R_SUBMMS:
                 R_SUBMMS:
-                  result:=taicpu.op_reg_ref(A_VMOVSS,reg2opsize(r),r,ref);
+                  result:=taicpu.op_reg_ref(A_VMOVSS,reg2opsize(r),r,tmpref);
                 R_SUBQ,
                 R_SUBQ,
                 R_SUBMMWHOLE:
                 R_SUBMMWHOLE:
-                  result:=taicpu.op_reg_ref(A_VMOVQ,S_NO,r,ref);
+                  result:=taicpu.op_reg_ref(A_VMOVQ,S_NO,r,tmpref);
                 else
                 else
                   internalerror(200506042);
                   internalerror(200506042);
               end
               end
             else
             else
               case getsubreg(r) of
               case getsubreg(r) of
                 R_SUBMMD:
                 R_SUBMMD:
-                  result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
+                  result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,tmpref);
                 R_SUBMMS:
                 R_SUBMMS:
-                  result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,ref);
+                  result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,tmpref);
                 R_SUBQ,
                 R_SUBQ,
                 R_SUBMMWHOLE:
                 R_SUBMMWHOLE:
-                  result:=taicpu.op_reg_ref(A_MOVQ,S_NO,r,ref);
+                  result:=taicpu.op_reg_ref(A_MOVQ,S_NO,r,tmpref);
                 else
                 else
                   internalerror(200506042);
                   internalerror(200506042);
               end;
               end;
@@ -3090,6 +3299,10 @@ implementation
       actRegTypes  : int64;
       actRegTypes  : int64;
       actRegMemTypes: int64;
       actRegMemTypes: int64;
       NewRegSize: int64;
       NewRegSize: int64;
+
+      actVMemCount  : integer;
+      actVMemTypes  : int64;
+
       RegMMXSizeMask: int64;
       RegMMXSizeMask: int64;
       RegXMMSizeMask: int64;
       RegXMMSizeMask: int64;
       RegYMMSizeMask: int64;
       RegYMMSizeMask: int64;
@@ -3145,19 +3358,30 @@ implementation
             actMemCount      := 0;
             actMemCount      := 0;
             actRegMemTypes   := 0;
             actRegMemTypes   := 0;
 
 
+            actVMemCount     := 0;
+            actVMemTypes     := 0;
+
             actConstSize     := 0;
             actConstSize     := 0;
             actConstCount    := 0;
             actConstCount    := 0;
 
 
-            if asmop = a_vpmovzxbq then
-            begin
-              RegXMMSizeMask := RegXMMSizeMask;
-            end;
-
-
-
             for j := 0 to insentry^.ops -1 do
             for j := 0 to insentry^.ops -1 do
             begin
             begin
-              if (insentry^.optypes[j] and OT_REGISTER) = OT_REGISTER then
+              if ((insentry^.optypes[j] and OT_XMEM32) = OT_XMEM32) OR
+                 ((insentry^.optypes[j] and OT_XMEM64) = OT_XMEM64) OR
+                 ((insentry^.optypes[j] and OT_YMEM32) = OT_YMEM32) OR
+                 ((insentry^.optypes[j] and OT_YMEM64) = OT_YMEM64) then
+              begin
+                inc(actVMemCount);
+
+                case insentry^.optypes[j] and (OT_XMEM32 OR OT_XMEM64 OR OT_YMEM32 OR OT_YMEM64) of
+                  OT_XMEM32: actVMemTypes := actVMemTypes or OT_XMEM32;
+                  OT_XMEM64: actVMemTypes := actVMemTypes or OT_XMEM64;
+                  OT_YMEM32: actVMemTypes := actVMemTypes or OT_YMEM32;
+                  OT_YMEM64: actVMemTypes := actVMemTypes or OT_YMEM64;
+                        else InternalError(777206);
+                end;
+              end
+              else if (insentry^.optypes[j] and OT_REGISTER) = OT_REGISTER then
               begin
               begin
                 inc(actRegCount);
                 inc(actRegCount);
 
 
@@ -3222,7 +3446,57 @@ implementation
               end;
               end;
             end;
             end;
 
 
+            if actVMemCount > 0 then
+            begin
+              if actVMemCount = 1 then
+              begin
+                if actVMemTypes > 0 then
+                begin
+                  case actVMemTypes of
+                    OT_XMEM32: MRefInfo := msiXMem32;
+                    OT_XMEM64: MRefInfo := msiXMem64;
+                    OT_YMEM32: MRefInfo := msiYMem32;
+                    OT_YMEM64: MRefInfo := msiYMem64;
+                          else InternalError(777208);
+                  end;
 
 
+                  case actRegTypes of
+                    OT_XMMREG: case MRefInfo of
+                                 msiXMem32,
+                                 msiXMem64: RegXMMSizeMask := RegXMMSizeMask or OT_BITS128;
+                                 msiYMem32,
+                                 msiYMem64: RegXMMSizeMask := RegXMMSizeMask or OT_BITS256;
+                                       else InternalError(777210);
+                               end;
+                    OT_YMMREG: case MRefInfo of
+                                 msiXMem32,
+                                 msiXMem64: RegYMMSizeMask := RegYMMSizeMask or OT_BITS128;
+                                 msiYMem32,
+                                 msiYMem64: RegYMMSizeMask := RegYMMSizeMask or OT_BITS256;
+                                       else InternalError(777211);
+                               end;
+                          //else InternalError(777209);
+                  end;
+
+
+                  if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiUnkown then
+                  begin
+                    InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := MRefInfo;
+                  end
+                  else if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize <> MRefInfo then
+                  begin
+                    if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize in [msiXMem32, msiXMem64, msiYMem32, msiYMem64] then
+                    begin
+                      InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiVMemMultiple;
+                    end
+                    else InternalError(777212);
+                  end;
+
+                end;
+              end
+              else InternalError(777207);
+            end
+            else
             case actMemCount of
             case actMemCount of
                 0: ; // nothing todo
                 0: ; // nothing todo
                 1: begin
                 1: begin
@@ -3309,7 +3583,14 @@ implementation
                             OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y256;
                             OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y256;
                           end;
                           end;
               OT_BITS128: begin
               OT_BITS128: begin
-                            if RegMMXSizeMask = 0 then
+                            if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiVMemMultiple then
+                            begin
+                              // vector-memory-operand AVX2 (e.g. VGATHER..)
+                              case RegYMMSizeMask of
+                                OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiVMemRegSize;
+                              end;
+                            end
+                            else if RegMMXSizeMask = 0 then
                             begin
                             begin
                               case RegYMMSizeMask of
                               case RegYMMSizeMask of
                                 OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;
                                 OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;

+ 92 - 13
compiler/x86/agx86att.pas

@@ -29,12 +29,13 @@ interface
 
 
     uses
     uses
       cclasses,cpubase,
       cclasses,cpubase,
-      globals,cgutils,
+      globals,globtype,cgutils,
       aasmbase,aasmtai,aasmdata,assemble,aggas;
       aasmbase,aasmtai,aasmdata,assemble,aggas;
 
 
     type
     type
       Tx86ATTAssembler=class(TGNUassembler)
       Tx86ATTAssembler=class(TGNUassembler)
         constructor create(smart: boolean); override;
         constructor create(smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
       end;
       end;
 
 
       Tx86AppleGNUAssembler=class(TAppleGNUassembler)
       Tx86AppleGNUAssembler=class(TAppleGNUassembler)
@@ -64,7 +65,6 @@ interface
   implementation
   implementation
 
 
     uses
     uses
-      globtype,
       cutils,systems,
       cutils,systems,
       verbose,
       verbose,
       itcpugas,
       itcpugas,
@@ -82,6 +82,44 @@ interface
         InstrWriter := Tx86InstrWriter.create(self);
         InstrWriter := Tx86InstrWriter.create(self);
       end;
       end;
 
 
+    function TX86ATTAssembler.MakeCmdLine: TCmdStr;
+      var
+        FormatName : string;
+      begin
+        result:=Inherited MakeCmdLine;
+{$ifdef i386}
+        case target_info.system of
+          system_i386_go32v2:
+            FormatName:='coff';
+          system_i386_wdosx,
+          system_i386_win32:
+            FormatName:='win32';
+          system_i386_embedded:
+            FormatName:='obj';
+          system_i386_linux,
+          system_i386_beos:
+            FormatName:='elf';
+          system_i386_darwin:
+            FormatName:='macho32';
+        else
+          FormatName:='elf';
+        end;
+{$endif i386}
+{$ifdef x86_64}
+        case target_info.system of
+          system_x86_64_win64:
+            FormatName:='win64';
+          system_x86_64_darwin:
+            FormatName:='macho64';
+          system_x86_64_linux:
+            FormatName:='elf64';
+        else
+          FormatName:='elf64';
+        end;
+{$endif x86_64}
+        Replace(result,'$FORMAT',FormatName);
+      end;
+
 {****************************************************************************
 {****************************************************************************
                           Tx86AppleGNUAssembler
                           Tx86AppleGNUAssembler
  ****************************************************************************}
  ****************************************************************************}
@@ -151,11 +189,20 @@ interface
              owner.AsmWrite('0');
              owner.AsmWrite('0');
            if (index<>NR_NO) and (base=NR_NO) then
            if (index<>NR_NO) and (base=NR_NO) then
             begin
             begin
-              owner.AsmWrite('(,'+gas_regname(index));
-              if scalefactor<>0 then
-               owner.AsmWrite(','+tostr(scalefactor)+')')
+              if scalefactor in [0,1] then
+                { Switching index to base position gives shorter
+                  assembler instructions }
+                begin
+                  owner.AsmWrite('('+gas_regname(index)+')');
+                end
               else
               else
-               owner.AsmWrite(')');
+                begin
+                  owner.AsmWrite('(,'+gas_regname(index));
+                  if scalefactor<>0 then
+                   owner.AsmWrite(','+tostr(scalefactor)+')')
+                  else
+                   owner.AsmWrite(')');
+                end;
             end
             end
            else
            else
             if (index=NR_NO) and (base<>NR_NO) then
             if (index=NR_NO) and (base<>NR_NO) then
@@ -407,6 +454,7 @@ interface
           end;
           end;
       end;
       end;
 
 
+
 {*****************************************************************************
 {*****************************************************************************
                                   Initialize
                                   Initialize
 *****************************************************************************}
 *****************************************************************************}
@@ -418,7 +466,7 @@ interface
             id     : as_gas;
             id     : as_gas;
             idtxt  : 'AS';
             idtxt  : 'AS';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '--64 -o $OBJ $ASM';
+            asmcmd : '--64 -o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_x86_64_linux,system_x86_64_freebsd,
             supported_targets : [system_x86_64_linux,system_x86_64_freebsd,
                                  system_x86_64_win64,system_x86_64_embedded,
                                  system_x86_64_win64,system_x86_64_embedded,
                                  system_x86_64_openbsd,system_x86_64_netbsd];
                                  system_x86_64_openbsd,system_x86_64_netbsd];
@@ -428,12 +476,25 @@ interface
             dollarsign: '$';
             dollarsign: '$';
           );
           );
 
 
+       as_x86_64_yasm_info : tasminfo =
+          (
+            id     : as_yasm;
+            idtxt  : 'YASM';
+            asmbin : 'yasm';
+            asmcmd : '-a x86 -p gas -f $FORMAT -o $OBJ $EXTRAOPT $ASM';
+            supported_targets : [system_x86_64_linux,system_x86_64_freebsd,system_x86_64_win64,system_x86_64_embedded];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
+            labelprefix : '.L';
+            comment : '# ';
+            dollarsign: '$';
+          );
+
        as_x86_64_gas_info : tasminfo =
        as_x86_64_gas_info : tasminfo =
           (
           (
             id     : as_ggas;
             id     : as_ggas;
             idtxt  : 'GAS';
             idtxt  : 'GAS';
             asmbin : 'gas';
             asmbin : 'gas';
-            asmcmd : '--64 -o $OBJ $ASM';
+            asmcmd : '--64 -o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_x86_64_solaris];
             supported_targets : [system_x86_64_solaris];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
             labelprefix : '.L';
             labelprefix : '.L';
@@ -448,7 +509,7 @@ interface
             id     : as_darwin;
             id     : as_darwin;
             idtxt  : 'AS-Darwin';
             idtxt  : 'AS-Darwin';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '-o $OBJ $ASM -arch x86_64';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch x86_64';
             supported_targets : [system_x86_64_darwin];
             supported_targets : [system_x86_64_darwin];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
             labelprefix : 'L';
             labelprefix : 'L';
@@ -462,7 +523,7 @@ interface
             id     : as_gas;
             id     : as_gas;
             idtxt  : 'AS';
             idtxt  : 'AS';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '--32 -o $OBJ $ASM';
+            asmcmd : '--32 -o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_i386_GO32V2,system_i386_linux,system_i386_Win32,system_i386_freebsd,system_i386_solaris,system_i386_beos,
             supported_targets : [system_i386_GO32V2,system_i386_linux,system_i386_Win32,system_i386_freebsd,system_i386_solaris,system_i386_beos,
                                 system_i386_netbsd,system_i386_Netware,system_i386_qnx,system_i386_wdosx,system_i386_openbsd,
                                 system_i386_netbsd,system_i386_Netware,system_i386_qnx,system_i386_wdosx,system_i386_openbsd,
                                 system_i386_netwlibc,system_i386_wince,system_i386_embedded,system_i386_symbian,system_i386_haiku,system_x86_6432_linux,
                                 system_i386_netwlibc,system_i386_wince,system_i386_embedded,system_i386_symbian,system_i386_haiku,system_x86_6432_linux,
@@ -473,13 +534,29 @@ interface
             dollarsign: '$';
             dollarsign: '$';
           );
           );
 
 
+       as_i386_yasm_info : tasminfo =
+          (
+            id     : as_yasm;
+            idtxt  : 'YASM';
+            asmbin : 'yasm';
+            asmcmd : '-a x86 -p gas -f $FORMAT -o $OBJ $EXTRAOPT $ASM';
+            supported_targets : [system_i386_GO32V2,system_i386_linux,system_i386_Win32,system_i386_freebsd,system_i386_solaris,system_i386_beos,
+                                system_i386_netbsd,system_i386_Netware,system_i386_qnx,system_i386_wdosx,system_i386_openbsd,
+                                system_i386_netwlibc,system_i386_wince,system_i386_embedded,system_i386_symbian,system_i386_haiku,system_x86_6432_linux,
+                                system_i386_nativent];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
+            labelprefix : '.L';
+            comment : '# ';
+            dollarsign: '$';
+          );
+
 
 
        as_i386_as_aout_info : tasminfo =
        as_i386_as_aout_info : tasminfo =
           (
           (
             id           : as_i386_as_aout;
             id           : as_i386_as_aout;
             idtxt  : 'AS_AOUT';
             idtxt  : 'AS_AOUT';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '-o $OBJ $ASM';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_i386_linux,system_i386_OS2,system_i386_freebsd,system_i386_netbsd,system_i386_openbsd,system_i386_EMX,system_i386_embedded];
             supported_targets : [system_i386_linux,system_i386_OS2,system_i386_freebsd,system_i386_netbsd,system_i386_openbsd,system_i386_EMX,system_i386_embedded];
             flags : [af_needar,af_stabs_use_function_absolute_addresses];
             flags : [af_needar,af_stabs_use_function_absolute_addresses];
             labelprefix : 'L';
             labelprefix : 'L';
@@ -493,7 +570,7 @@ interface
             id     : as_darwin;
             id     : as_darwin;
             idtxt  : 'AS-Darwin';
             idtxt  : 'AS-Darwin';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '-o $OBJ $ASM -arch i386';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch i386';
             supported_targets : [system_i386_darwin,system_i386_iphonesim];
             supported_targets : [system_i386_darwin,system_i386_iphonesim];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
             labelprefix : 'L';
             labelprefix : 'L';
@@ -506,7 +583,7 @@ interface
             id     : as_ggas;
             id     : as_ggas;
             idtxt  : 'GAS';
             idtxt  : 'GAS';
             asmbin : 'gas';
             asmbin : 'gas';
-            asmcmd : '--32 -o $OBJ $ASM';
+            asmcmd : '--32 -o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_i386_GO32V2,system_i386_linux,system_i386_Win32,system_i386_freebsd,system_i386_solaris,system_i386_beos,
             supported_targets : [system_i386_GO32V2,system_i386_linux,system_i386_Win32,system_i386_freebsd,system_i386_solaris,system_i386_beos,
                                 system_i386_netbsd,system_i386_Netware,system_i386_qnx,system_i386_wdosx,system_i386_openbsd,
                                 system_i386_netbsd,system_i386_Netware,system_i386_qnx,system_i386_wdosx,system_i386_openbsd,
                                 system_i386_netwlibc,system_i386_wince,system_i386_embedded,system_i386_symbian,system_i386_haiku,
                                 system_i386_netwlibc,system_i386_wince,system_i386_embedded,system_i386_symbian,system_i386_haiku,
@@ -521,11 +598,13 @@ interface
 initialization
 initialization
 {$ifdef x86_64}
 {$ifdef x86_64}
   RegisterAssembler(as_x86_64_as_info,Tx86ATTAssembler);
   RegisterAssembler(as_x86_64_as_info,Tx86ATTAssembler);
+  RegisterAssembler(as_x86_64_yasm_info,Tx86ATTAssembler);
   RegisterAssembler(as_x86_64_gas_info,Tx86ATTAssembler);
   RegisterAssembler(as_x86_64_gas_info,Tx86ATTAssembler);
   RegisterAssembler(as_x86_64_gas_darwin_info,Tx86AppleGNUAssembler);
   RegisterAssembler(as_x86_64_gas_darwin_info,Tx86AppleGNUAssembler);
 {$else x86_64}
 {$else x86_64}
   RegisterAssembler(as_i386_as_info,Tx86ATTAssembler);
   RegisterAssembler(as_i386_as_info,Tx86ATTAssembler);
   RegisterAssembler(as_i386_gas_info,Tx86ATTAssembler);
   RegisterAssembler(as_i386_gas_info,Tx86ATTAssembler);
+  RegisterAssembler(as_i386_yasm_info,Tx86ATTAssembler);
   RegisterAssembler(as_i386_gas_darwin_info,Tx86AppleGNUAssembler);
   RegisterAssembler(as_i386_gas_darwin_info,Tx86AppleGNUAssembler);
   RegisterAssembler(as_i386_as_aout_info,Tx86AoutGNUAssembler);
   RegisterAssembler(as_i386_as_aout_info,Tx86AoutGNUAssembler);
 {$endif x86_64}
 {$endif x86_64}

+ 16 - 6
compiler/x86/agx86int.pas

@@ -106,6 +106,8 @@ implementation
         '',
         '',
         '',
         '',
         '',
         '',
+        '',
+        '',
         ''
         ''
       );
       );
 
 
@@ -156,6 +158,8 @@ implementation
         '',
         '',
         '',
         '',
         '',
         '',
+        '',
+        '',
         ''
         ''
       );
       );
 
 
@@ -309,8 +313,11 @@ implementation
               if o.ref^.refaddr in [addr_no,addr_pic,addr_pic_no_got] then
               if o.ref^.refaddr in [addr_no,addr_pic,addr_pic_no_got] then
                 begin
                 begin
                   if ((opcode <> A_LGS) and (opcode <> A_LSS) and
                   if ((opcode <> A_LGS) and (opcode <> A_LSS) and
-                      (opcode <> A_LFS) and (opcode <> A_LDS) and
-                      (opcode <> A_LES)) then
+                      (opcode <> A_LFS)
+{$ifndef x86_64}
+                      and (opcode <> A_LDS) and (opcode <> A_LES)
+{$endif x86_64}
+                      ) then
                    Begin
                    Begin
                      case s of
                      case s of
                       S_B : AsmWrite('byte ptr ');
                       S_B : AsmWrite('byte ptr ');
@@ -576,6 +583,9 @@ implementation
                        hp:=tai(hp.next);
                        hp:=tai(hp.next);
                        AsmWrite(',');
                        AsmWrite(',');
                      until false;
                      until false;
+                     { Substract section start for secrel32 type }
+                     if consttype=aitconst_secrel32_symbol then
+                       AsmWrite(' - $$');
                      AsmLn;
                      AsmLn;
                    end;
                    end;
                  else
                  else
@@ -962,7 +972,7 @@ implementation
             id           : as_i386_tasm;
             id           : as_i386_tasm;
             idtxt  : 'TASM';
             idtxt  : 'TASM';
             asmbin : 'tasm';
             asmbin : 'tasm';
-            asmcmd : '/m2 /ml $ASM $OBJ';
+            asmcmd : '/m2 /ml $EXTRAOPT $ASM $OBJ';
             supported_targets : [system_i386_GO32V2,system_i386_Win32,system_i386_wdosx,system_i386_watcom,system_i386_wince];
             supported_targets : [system_i386_GO32V2,system_i386_Win32,system_i386_wdosx,system_i386_watcom,system_i386_wince];
             flags : [af_needar,af_labelprefix_only_inside_procedure];
             flags : [af_needar,af_labelprefix_only_inside_procedure];
             labelprefix : '@@';
             labelprefix : '@@';
@@ -975,7 +985,7 @@ implementation
             id           : as_i386_masm;
             id           : as_i386_masm;
             idtxt  : 'MASM';
             idtxt  : 'MASM';
             asmbin : 'masm';
             asmbin : 'masm';
-            asmcmd : '/c /Cp $ASM /Fo$OBJ';
+            asmcmd : '/c /Cp $EXTRAOPT $ASM /Fo$OBJ';
             supported_targets : [system_i386_GO32V2,system_i386_Win32,system_i386_wdosx,system_i386_watcom,system_i386_wince];
             supported_targets : [system_i386_GO32V2,system_i386_Win32,system_i386_wdosx,system_i386_watcom,system_i386_wince];
             flags : [af_needar];
             flags : [af_needar];
             labelprefix : '@@';
             labelprefix : '@@';
@@ -988,7 +998,7 @@ implementation
             id     : as_i386_wasm;
             id     : as_i386_wasm;
             idtxt  : 'WASM';
             idtxt  : 'WASM';
             asmbin : 'wasm';
             asmbin : 'wasm';
-            asmcmd : '$ASM -6s -fp6 -ms -zq -Fo=$OBJ';
+            asmcmd : '$ASM $EXTRAOPT -6s -fp6 -ms -zq -Fo=$OBJ';
             supported_targets : [system_i386_watcom];
             supported_targets : [system_i386_watcom];
             flags : [af_needar];
             flags : [af_needar];
             labelprefix : '@@';
             labelprefix : '@@';
@@ -1002,7 +1012,7 @@ implementation
             id     : as_x86_64_masm;
             id     : as_x86_64_masm;
             idtxt  : 'MASM';
             idtxt  : 'MASM';
             asmbin : 'ml64';
             asmbin : 'ml64';
-            asmcmd : '/c /Cp $ASM /Fo$OBJ';
+            asmcmd : '/c /Cp $EXTRAOPT $ASM /Fo$OBJ';
             supported_targets : [system_x86_64_win64];
             supported_targets : [system_x86_64_win64];
             flags : [af_needar];
             flags : [af_needar];
             labelprefix : '@@';
             labelprefix : '@@';

+ 388 - 101
compiler/x86/agx86nsm.pas

@@ -27,20 +27,20 @@ unit agx86nsm;
 interface
 interface
 
 
     uses
     uses
-      cpubase,
+      cpubase,globtype,
       aasmbase,aasmtai,aasmdata,aasmcpu,assemble,cgutils;
       aasmbase,aasmtai,aasmdata,aasmcpu,assemble,cgutils;
 
 
     type
     type
 
 
       { T386NasmAssembler }
       { T386NasmAssembler }
 
 
-      T386NasmAssembler = class(texternalassembler)
+      TX86NasmAssembler = class(texternalassembler)
       private
       private
-        function CodeSectionName: string;
-
+        using_relative : boolean;
+        function CodeSectionName(const aname:string): string;
         procedure WriteReference(var ref : treference);
         procedure WriteReference(var ref : treference);
         procedure WriteOper(const o:toper;s : topsize; opcode: tasmop;ops:longint;dest : boolean);
         procedure WriteOper(const o:toper;s : topsize; opcode: tasmop;ops:longint;dest : boolean);
-        procedure WriteOper_jmp(const o:toper; op : tasmop);
+        procedure WriteOper_jmp(const o:toper; ai : taicpu);
         procedure WriteSection(atype:TAsmSectiontype;const aname:string);
         procedure WriteSection(atype:TAsmSectiontype;const aname:string);
       public
       public
         procedure WriteTree(p:TAsmList);override;
         procedure WriteTree(p:TAsmList);override;
@@ -48,6 +48,7 @@ interface
         procedure WriteExternals;
         procedure WriteExternals;
         procedure WriteSmartExternals;
         procedure WriteSmartExternals;
         procedure WriteHeader;
         procedure WriteHeader;
+        function  MakeCmdLine: TCmdStr;override;
       end;
       end;
 
 
 
 
@@ -55,7 +56,7 @@ interface
   implementation
   implementation
 
 
     uses
     uses
-      cutils,globtype,globals,systems,cclasses,
+      cutils,globals,systems,cclasses,
       fmodule,finput,verbose,cpuinfo,cgbase
       fmodule,finput,verbose,cpuinfo,cgbase
       ;
       ;
 
 
@@ -71,7 +72,7 @@ interface
       nasm_regname_table : array[tregisterindex] of string[7] = (
       nasm_regname_table : array[tregisterindex] of string[7] = (
         {r386nasm.inc contains the Nasm name of each register.}
         {r386nasm.inc contains the Nasm name of each register.}
 {$if defined(x86_64)}
 {$if defined(x86_64)}
-        {$fatal nasm support not yet implemented for x86_64 }
+        {$i r8664nasm.inc}
 {$elseif defined(i386)}
 {$elseif defined(i386)}
         {$i r386nasm.inc}
         {$i r386nasm.inc}
 {$elseif defined(i8086)}
 {$elseif defined(i8086)}
@@ -215,6 +216,23 @@ interface
                sizestr:='dword '
                sizestr:='dword '
              else
              else
                sizestr:='word ';
                sizestr:='word ';
+{$ifdef x86_64}
+           S_BQ : if dest then
+                   sizestr:='qword '
+                  else
+                   sizestr:='byte ';
+           S_WQ : if dest then
+                   sizestr:='qword '
+                  else
+                   sizestr:='word ';
+           S_LQ : if dest then
+                   sizestr:='qword '
+                  else
+                   sizestr:='dword ';
+           { Nothing needed for XMM registers }
+           S_XMM: sizestr:='';
+
+{$endif x86_64}
           else { S_NO }
           else { S_NO }
             sizestr:='';
             sizestr:='';
         end;
         end;
@@ -290,31 +308,47 @@ interface
     end;
     end;
 
 
 {****************************************************************************
 {****************************************************************************
-                               T386NasmAssembler
+                               TX86NasmAssembler
  ****************************************************************************}
  ****************************************************************************}
 
 
 
 
-    function T386NasmAssembler.CodeSectionName: string;
+    function TX86NasmAssembler.CodeSectionName(const aname:string): string;
       begin
       begin
 {$ifdef i8086}
 {$ifdef i8086}
         if current_settings.x86memorymodel in x86_far_code_models then
         if current_settings.x86memorymodel in x86_far_code_models then
-          result:=current_module.modulename^ + '_TEXT'
+          begin
+            if cs_huge_code in current_settings.moduleswitches then
+              result:=aname + '_TEXT use16 class=code'
+            else
+              result:=current_module.modulename^ + '_TEXT';
+          end
         else
         else
 {$endif}
 {$endif}
           result:='.text';
           result:='.text';
       end;
       end;
 
 
 
 
-    procedure T386NasmAssembler.WriteReference(var ref : treference);
+    procedure TX86NasmAssembler.WriteReference(var ref : treference);
       var
       var
         first : boolean;
         first : boolean;
+        base_done : boolean;
       begin
       begin
         with ref do
         with ref do
          begin
          begin
            AsmWrite('[');
            AsmWrite('[');
            first:=true;
            first:=true;
+           base_done:=false;
            if (segment<>NR_NO) then
            if (segment<>NR_NO) then
              AsmWrite(nasm_regname(segment)+':');
              AsmWrite(nasm_regname(segment)+':');
+{$ifdef x86_64}
+          if (base=NR_RIP) then
+            begin
+              { nasm RIP is implicit for pic }
+              if not (ref.refaddr in [addr_pic,addr_pic_no_got]) and not using_relative then
+                AsmWrite('$ + ');
+              base_done:=true;
+            end;
+{$endif x86_64}
            if assigned(symbol) then
            if assigned(symbol) then
             begin
             begin
               AsmWrite(symbol.name);
               AsmWrite(symbol.name);
@@ -322,7 +356,7 @@ interface
                 AddSymbol(symbol.name,false);
                 AddSymbol(symbol.name,false);
               first:=false;
               first:=false;
             end;
             end;
-           if (base<>NR_NO) then
+           if (base<>NR_NO) and not base_done then
             begin
             begin
               if not(first) then
               if not(first) then
                AsmWrite('+')
                AsmWrite('+')
@@ -357,7 +391,7 @@ interface
        end;
        end;
 
 
 
 
-    procedure T386NasmAssembler.WriteOper(const o:toper;s : topsize; opcode: tasmop;ops:longint;dest : boolean);
+    procedure TX86NasmAssembler.WriteOper(const o:toper;s : topsize; opcode: tasmop;ops:longint;dest : boolean);
       begin
       begin
         case o.typ of
         case o.typ of
           top_reg :
           top_reg :
@@ -370,11 +404,13 @@ interface
             end;
             end;
           top_ref :
           top_ref :
             begin
             begin
-              if o.ref^.refaddr=addr_no then
+              if o.ref^.refaddr in [addr_no,addr_pic,addr_pic_no_got] then
                 begin
                 begin
                   if not ((opcode = A_LEA) or (opcode = A_LGS) or
                   if not ((opcode = A_LEA) or (opcode = A_LGS) or
                           (opcode = A_LSS) or (opcode = A_LFS) or
                           (opcode = A_LSS) or (opcode = A_LFS) or
+{$ifndef x86_64}
                           (opcode = A_LES) or (opcode = A_LDS) or
                           (opcode = A_LES) or (opcode = A_LDS) or
+{$endif x86_64}
                          // (opcode = A_SHR) or (opcode = A_SHL) or
                          // (opcode = A_SHR) or (opcode = A_SHL) or
                          // (opcode = A_SAR) or (opcode = A_SAL) or
                          // (opcode = A_SAR) or (opcode = A_SAL) or
                           (opcode = A_OUT) or (opcode = A_IN)) then
                           (opcode = A_OUT) or (opcode = A_IN)) then
@@ -390,15 +426,16 @@ interface
               else
               else
                 begin
                 begin
 {$ifdef x86_64}
 {$ifdef x86_64}
-                  asmwrite('qword ');
+                  if s=S_L then
+                    asmwrite('dword ')
+                  else
+                    asmwrite('qword ');
 {$endif}
 {$endif}
 {$ifdef i386}
 {$ifdef i386}
                   asmwrite('dword ');
                   asmwrite('dword ');
 {$endif i386}
 {$endif i386}
 {$ifdef i8086}
 {$ifdef i8086}
-                  if o.ref^.refaddr=addr_far then
-                    asmwrite('far ')
-                  else if o.ref^.refaddr=addr_seg then
+                  if o.ref^.refaddr=addr_seg then
                     asmwrite('SEG ')
                     asmwrite('SEG ')
                   else
                   else
                     asmwrite('word ');
                     asmwrite('word ');
@@ -422,39 +459,26 @@ interface
       end;
       end;
 
 
 
 
-    procedure T386NasmAssembler.WriteOper_jmp(const o:toper; op : tasmop);
+    procedure TX86NasmAssembler.WriteOper_jmp(const o:toper; ai : taicpu);
       begin
       begin
         case o.typ of
         case o.typ of
           top_reg :
           top_reg :
             AsmWrite(nasm_regname(o.reg));
             AsmWrite(nasm_regname(o.reg));
           top_ref :
           top_ref :
-            if o.ref^.refaddr in [addr_no{$ifdef i8086},addr_far_ref{$endif}] then
+            if o.ref^.refaddr=addr_no then
               begin
               begin
-{$ifdef i8086}
-                if o.ref^.refaddr=addr_far_ref then
+                if ai.opsize=S_FAR then
                   AsmWrite('far ');
                   AsmWrite('far ');
-{$endif i8086}
                 WriteReference(o.ref^);
                 WriteReference(o.ref^);
               end
               end
             else
             else
               begin
               begin
-{ NEAR forces NASM to emit near jumps, which are 386+ }
-{$ifndef i8086}
-                if not(
-                       (op=A_JCXZ) or (op=A_JECXZ) or
-    {$ifdef x86_64}
-                       (op=A_JRCXZ) or
-    {$endif x86_64}
-                       (op=A_LOOP) or (op=A_LOOPE) or
-                       (op=A_LOOPNE) or (op=A_LOOPNZ) or
-                       (op=A_LOOPZ)
-                      ) then
-                  AsmWrite('NEAR ');
-{$endif i8086}
-{$ifdef i8086}
-                if o.ref^.refaddr=addr_far then
+                if ai.opsize=S_FAR then
                   AsmWrite('far ');
                   AsmWrite('far ');
-{$endif i8086}
+                { else
+                   AsmWrite('near ') just disables short branches, increasing code size. 
+                   Omitting it does not cause any bad effects, tested with nasm 2.11. }
+
                 AsmWrite(o.ref^.symbol.name);
                 AsmWrite(o.ref^.symbol.name);
                 if SmartAsm then
                 if SmartAsm then
                   AddSymbol(o.ref^.symbol.name,false);
                   AddSymbol(o.ref^.symbol.name,false);
@@ -473,13 +497,15 @@ interface
 
 
 
 
     const
     const
-      ait_const2str : array[aitconst_128bit..aitconst_secrel32_symbol] of string[20]=(
-        #9'FIXME_128BIT'#9,#9'FIXME_64BIT'#9,#9'DD'#9,#9'DW'#9,#9'DB'#9,
+      ait_const2str : array[aitconst_128bit..aitconst_64bit_unaligned] of string[30]=(
+        #9'FIXME_128BIT'#9,#9'DQ'#9,#9'DD'#9,#9'DW'#9,#9'DB'#9,
         #9'FIXME_SLEB128BIT'#9,#9'FIXME_ULEB128BIT'#9,
         #9'FIXME_SLEB128BIT'#9,#9'FIXME_ULEB128BIT'#9,
-        #9'RVA'#9,#9'SECREL32'#9
+        #9'RVA'#9,#9'SECREL32'#9,#9'FIXME_darwin_dwarf_delta64'#9,
+        #9'FIXME_darwin_dwarf_delta32'#9,#9'FIXME_half16bit'#9,
+        #9'DW'#9,#9'DD'#9,#9'FIXME_64BIT_UNALIGNED'#9
       );
       );
 
 
-    procedure T386NasmAssembler.WriteSection(atype:TAsmSectiontype;const aname:string);
+    procedure TX86NasmAssembler.WriteSection(atype:TAsmSectiontype;const aname:string);
       const
       const
         secnames : array[TAsmSectiontype] of string[length('__DATA, __datacoal_nt,coalesced')] = ('','',
         secnames : array[TAsmSectiontype] of string[length('__DATA, __datacoal_nt,coalesced')] = ('','',
           '.text',
           '.text',
@@ -532,7 +558,9 @@ interface
           '.objc_nlclasslist',
           '.objc_nlclasslist',
           '.objc_catlist',
           '.objc_catlist',
           '.obcj_nlcatlist',
           '.obcj_nlcatlist',
-          '.objc_protolist'
+          '.objc_protolist',
+          '.stack',
+          '.heap'
         );
         );
       begin
       begin
         AsmLn;
         AsmLn;
@@ -543,8 +571,23 @@ interface
         if (atype in [sec_rodata,sec_rodata_norel]) and
         if (atype in [sec_rodata,sec_rodata_norel]) and
           (target_info.system=system_i386_go32v2) then
           (target_info.system=system_i386_go32v2) then
           AsmWrite('.data')
           AsmWrite('.data')
+        else if (atype=sec_user) then
+          AsmWrite(aname)
+        else if (atype=sec_threadvar) and
+          (target_info.system in (systems_windows+systems_wince)) then
+          AsmWrite('.tls'#9'bss')
         else if secnames[atype]='.text' then
         else if secnames[atype]='.text' then
-          AsmWrite(CodeSectionName)
+          AsmWrite(CodeSectionName(aname))
+{$ifdef i8086}
+        else if (target_info.system=system_i8086_msdos) and
+                (atype=sec_stack) and
+                (current_settings.x86memorymodel in x86_far_data_models) then
+          AsmWrite('stack stack class=stack align=16')
+        else if (target_info.system=system_i8086_msdos) and
+                (atype=sec_heap) and
+                (current_settings.x86memorymodel in x86_far_data_models) then
+          AsmWrite('heap class=heap align=16')
+{$endif i8086}
         else
         else
           AsmWrite(secnames[atype]);
           AsmWrite(secnames[atype]);
         if create_smartlink_sections and
         if create_smartlink_sections and
@@ -555,10 +598,10 @@ interface
             AsmWrite(aname);
             AsmWrite(aname);
           end;
           end;
         AsmLn;
         AsmLn;
-        LasTSecType:=atype;
+        LastSecType:=atype;
       end;
       end;
 
 
-    procedure T386NasmAssembler.WriteTree(p:TAsmList);
+    procedure TX86NasmAssembler.WriteTree(p:TAsmList);
 {$ifdef cpuextended}
 {$ifdef cpuextended}
     type
     type
       t80bitarray = array[0..9] of byte;
       t80bitarray = array[0..9] of byte;
@@ -571,7 +614,7 @@ interface
       i,j,l    : longint;
       i,j,l    : longint;
       InlineLevel : longint;
       InlineLevel : longint;
       consttype : taiconst_type;
       consttype : taiconst_type;
-      do_line,
+      do_line, SkipNewLine,
       quoted   : boolean;
       quoted   : boolean;
       co       : comp;
       co       : comp;
       sin      : single;
       sin      : single;
@@ -580,6 +623,7 @@ interface
       e        : extended;
       e        : extended;
 {$endif cpuextended}
 {$endif cpuextended}
       fixed_opcode: TAsmOp;
       fixed_opcode: TAsmOp;
+      prefix, LastSecName  : string;
     begin
     begin
       if not assigned(p) then
       if not assigned(p) then
        exit;
        exit;
@@ -625,16 +669,21 @@ interface
              begin
              begin
                if tai_section(hp).sectype<>sec_none then
                if tai_section(hp).sectype<>sec_none then
                  WriteSection(tai_section(hp).sectype,tai_section(hp).name^);
                  WriteSection(tai_section(hp).sectype,tai_section(hp).name^);
-               LasTSecType:=tai_section(hp).sectype;
+               LastSecType:=tai_section(hp).sectype;
              end;
              end;
 
 
            ait_align :
            ait_align :
              begin
              begin
-               { nasm gives warnings when it finds align in bss as it
-                 wants to store data }
-               if (lastsectype<>sec_bss) and
-                  (tai_align(hp).aligntype>1) then
-                 AsmWriteLn(#9'ALIGN '+tostr(tai_align(hp).aligntype));
+               if (tai_align(hp).aligntype>1) then
+                 begin
+                   if (LastSecType=sec_bss) or (
+                      (LastSecType=sec_threadvar) and
+                      (target_info.system in (systems_windows+systems_wince))
+                     ) then
+                      AsmWriteLn(#9'ALIGNB '+tostr(tai_align(hp).aligntype))
+                    else
+                      AsmWriteLn(#9'ALIGN '+tostr(tai_align(hp).aligntype));
+                 end;
              end;
              end;
 
 
            ait_datablock :
            ait_datablock :
@@ -654,7 +703,8 @@ interface
              begin
              begin
                consttype:=tai_const(hp).consttype;
                consttype:=tai_const(hp).consttype;
                case consttype of
                case consttype of
-                 aitconst_64bit :
+                 aitconst_64bit,
+                 aitconst_64bit_unaligned:
                     begin
                     begin
                       if assigned(tai_const(hp).sym) then
                       if assigned(tai_const(hp).sym) then
                         internalerror(200404292);
                         internalerror(200404292);
@@ -668,6 +718,8 @@ interface
                  aitconst_sleb128bit,
                  aitconst_sleb128bit,
                  aitconst_128bit:
                  aitconst_128bit:
                     begin
                     begin
+                      AsmWriteLn(target_asm.comment+'Unsupported const type '+
+                        ait_const2str[consttype]);
                     end;
                     end;
 {$ifdef i8086}
 {$ifdef i8086}
                  aitconst_farptr:
                  aitconst_farptr:
@@ -695,7 +747,9 @@ interface
                  aitconst_16bit,
                  aitconst_16bit,
                  aitconst_8bit,
                  aitconst_8bit,
                  aitconst_rva_symbol,
                  aitconst_rva_symbol,
-                 aitconst_secrel32_symbol :
+                 aitconst_secrel32_symbol,
+                 aitconst_16bit_unaligned,
+                 aitconst_32bit_unaligned:
                    begin
                    begin
                      AsmWrite(ait_const2str[tai_const(hp).consttype]);
                      AsmWrite(ait_const2str[tai_const(hp).consttype]);
                      l:=0;
                      l:=0;
@@ -952,6 +1006,13 @@ interface
                fixed_opcode:=taicpu(hp).FixNonCommutativeOpcodes;
                fixed_opcode:=taicpu(hp).FixNonCommutativeOpcodes;
                { We need intel order, no At&t }
                { We need intel order, no At&t }
                taicpu(hp).SetOperandOrder(op_intel);
                taicpu(hp).SetOperandOrder(op_intel);
+               { LOCK must be on same line as opcode }
+               if (taicpu(hp).ops = 0) and
+                   (fixed_opcode = A_LOCK) then
+                 SkipNewLine:=true
+               else
+                 SkipNewLine:=false;
+
                s:='';
                s:='';
                if ((fixed_opcode=A_FADDP) or
                if ((fixed_opcode=A_FADDP) or
                    (fixed_opcode=A_FMULP))
                    (fixed_opcode=A_FMULP))
@@ -963,10 +1024,33 @@ interface
                    taicpu(hp).oper[1]^.typ:=top_reg;
                    taicpu(hp).oper[1]^.typ:=top_reg;
                    taicpu(hp).oper[1]^.reg:=NR_ST;
                    taicpu(hp).oper[1]^.reg:=NR_ST;
                  end;
                  end;
+                 { NASM only accepts move for loading of
+                   simple symbol address }
+                  if ((taicpu(hp).opcode=A_LEA) and
+                      (taicpu(hp).ops=2) and
+                      (taicpu(hp).oper[0]^.typ=top_reg) and
+                      (reg2opsize(taicpu(hp).oper[0]^.reg) in [S_NO,S_Q]) and
+                      (taicpu(hp).oper[1]^.typ=top_ref) and
+                      (taicpu(hp).oper[1]^.ref^.refaddr<>addr_no) and
+                      assigned(taicpu(hp).oper[1]^.ref^.symbol) and
+                      (taicpu(hp).oper[1]^.ref^.base=NR_NO)) then
+                    begin
+                      AsmWrite(target_asm.comment);
+                      AsmWriteln('Converting LEA to MOV instruction');
+                      taicpu(hp).opcode:=A_MOV;
+                    end;
                if fixed_opcode=A_FWAIT then
                if fixed_opcode=A_FWAIT then
                 AsmWriteln(#9#9'DB'#9'09bh')
                 AsmWriteln(#9#9'DB'#9'09bh')
                else
                else
                 begin
                 begin
+                  prefix:='';
+{$ifdef i8086}
+                  { nickysn note: I don't know if the 187 requires FWAIT before
+                    every instruction like the 8087, so I'm including it just in case }
+                  if (current_settings.cputype<=cpu_186) and
+                      requires_fwait_on_8087(fixed_opcode) then
+                    prefix:='wait '+prefix;
+{$endif i8086}
 {$ifndef i8086}
 {$ifndef i8086}
                   { We need to explicitely set
                   { We need to explicitely set
                     word prefix to get selectors
                     word prefix to get selectors
@@ -978,13 +1062,13 @@ interface
                       (is_segment_reg(taicpu(hp).oper[0]^.reg)) then
                       (is_segment_reg(taicpu(hp).oper[0]^.reg)) then
                     AsmWriteln(#9#9'DB'#9'066h');
                     AsmWriteln(#9#9'DB'#9'066h');
 {$endif not i8086}
 {$endif not i8086}
-                  AsmWrite(#9#9+std_op2str[fixed_opcode]+cond2str[taicpu(hp).condition]);
+                  AsmWrite(#9#9+prefix+std_op2str[fixed_opcode]+cond2str[taicpu(hp).condition]);
                   if taicpu(hp).ops<>0 then
                   if taicpu(hp).ops<>0 then
                    begin
                    begin
                      if is_calljmp(fixed_opcode) then
                      if is_calljmp(fixed_opcode) then
                       begin
                       begin
                         AsmWrite(#9);
                         AsmWrite(#9);
-                        WriteOper_jmp(taicpu(hp).oper[0]^,fixed_opcode);
+                        WriteOper_jmp(taicpu(hp).oper[0]^,taicpu(hp));
                       end
                       end
                      else
                      else
                       begin
                       begin
@@ -998,7 +1082,8 @@ interface
                          end;
                          end;
                       end;
                       end;
                    end;
                    end;
-                  AsmLn;
+                  if not SkipNewLine then
+                    AsmLn;
                 end;
                 end;
              end;
              end;
 
 
@@ -1026,14 +1111,19 @@ interface
                     WriteHeader;
                     WriteHeader;
                   end;
                   end;
                { avoid empty files }
                { avoid empty files }
+                 LastSecType:=sec_none;
+                 LastSecName:='';
                  while assigned(hp.next) and (tai(hp.next).typ in [ait_cutobject,ait_section,ait_comment]) do
                  while assigned(hp.next) and (tai(hp.next).typ in [ait_cutobject,ait_section,ait_comment]) do
                   begin
                   begin
                     if tai(hp.next).typ=ait_section then
                     if tai(hp.next).typ=ait_section then
-                      lasTSectype:=tai_section(hp.next).sectype;
+                      begin
+                        LastSecType:=tai_section(hp.next).sectype;
+                        LastSecName:=tai_section(hp.next).name^;
+                      end;
                     hp:=tai(hp.next);
                     hp:=tai(hp.next);
                   end;
                   end;
-                 if lasTSectype<>sec_none then
-                   WriteSection(lasTSectype,'');
+                 if LastSecType<>sec_none then
+                   WriteSection(LastSecType,LastSecName);
                  AsmStartSize:=AsmSize;
                  AsmStartSize:=AsmSize;
                end;
                end;
              end;
              end;
@@ -1066,6 +1156,15 @@ interface
              end;
              end;
            ait_seh_directive :
            ait_seh_directive :
              { Ignore for now };
              { Ignore for now };
+           ait_varloc:
+             begin
+               if tai_varloc(hp).newlocationhi<>NR_NO then
+                 AsmWriteLn(target_asm.comment+'Var '+tai_varloc(hp).varsym.realname+' located in register '+
+                   std_regname(tai_varloc(hp).newlocationhi)+':'+std_regname(tai_varloc(hp).newlocation))
+               else
+                 AsmWriteLn(target_asm.comment+'Var '+tai_varloc(hp).varsym.realname+' located in register '+
+                   std_regname(tai_varloc(hp).newlocation));
+             end;
            else
            else
              internalerror(10000);
              internalerror(10000);
          end;
          end;
@@ -1074,7 +1173,7 @@ interface
     end;
     end;
 
 
 
 
-    procedure T386NasmAssembler.WriteExternals;
+    procedure TX86NasmAssembler.WriteExternals;
       var
       var
         sym : TAsmSymbol;
         sym : TAsmSymbol;
         i   : longint;
         i   : longint;
@@ -1087,7 +1186,7 @@ interface
           end;
           end;
       end;
       end;
 
 
-    procedure T386NasmAssembler.WriteSmartExternals;
+    procedure TX86NasmAssembler.WriteSmartExternals;
       var
       var
         EC : PExternChain;
         EC : PExternChain;
       begin
       begin
@@ -1100,9 +1199,9 @@ interface
           end;
           end;
       end;
       end;
 
 
-    procedure T386NasmAssembler.WriteHeader;
+    procedure TX86NasmAssembler.WriteHeader;
       begin
       begin
-{$ifdef i8086}
+{$if defined(i8086)}
       AsmWriteLn('BITS 16');
       AsmWriteLn('BITS 16');
       case current_settings.cputype of
       case current_settings.cputype of
         cpu_8086: AsmWriteLn('CPU 8086');
         cpu_8086: AsmWriteLn('CPU 8086');
@@ -1118,30 +1217,48 @@ interface
           internalerror(2013050101);
           internalerror(2013050101);
       end;
       end;
 
 
-      AsmWriteLn('SECTION ' + CodeSectionName + ' use16 class=code');
+      if not (cs_huge_code in current_settings.moduleswitches) then
+        AsmWriteLn('SECTION ' + CodeSectionName(current_module.modulename^) + ' use16 class=code');
+      { NASM complains if you put a missing section in the GROUP directive, so }
+      { we add empty declarations to make sure they exist, even if empty }
+      AsmWriteLn('SECTION .rodata class=data');
+      AsmWriteLn('SECTION .data class=data');
+      AsmWriteLn('SECTION .fpc class=data');
+      { WLINK requires class=bss in order to leave the BSS section out of the executable }
+      AsmWriteLn('SECTION .bss class=bss');
+      if (current_settings.x86memorymodel<>mm_tiny) and
+         (current_settings.x86memorymodel in x86_near_data_models) then
+        AsmWriteLn('SECTION stack stack class=stack align=16');
       if current_settings.x86memorymodel in x86_near_data_models then
       if current_settings.x86memorymodel in x86_near_data_models then
+        AsmWriteLn('SECTION heap class=heap align=16');
+      { group these sections in the same segment }
+      if current_settings.x86memorymodel=mm_tiny then
+        AsmWriteLn('GROUP dgroup text rodata data fpc bss heap')
+      else if current_settings.x86memorymodel in x86_near_data_models then
+        AsmWriteLn('GROUP dgroup rodata data fpc bss stack heap')
+      else
+        AsmWriteLn('GROUP dgroup rodata data fpc bss');
+      if paratargetdbg in [dbg_dwarf2,dbg_dwarf3,dbg_dwarf4] then
         begin
         begin
-          { NASM complains if you put a missing section in the GROUP directive, so }
-          { we add empty declarations to make sure they exist, even if empty }
-          AsmWriteLn('SECTION .rodata');
-          AsmWriteLn('SECTION .data');
-          AsmWriteLn('SECTION .fpc');
-          { WLINK requires class=bss in order to leave the BSS section out of the executable }
-          AsmWriteLn('SECTION .bss class=bss');
-          { group these sections in the same segment }
-          if current_settings.x86memorymodel=mm_tiny then
-            AsmWriteLn('GROUP dgroup text rodata data fpc bss')
-          else
-            AsmWriteLn('GROUP dgroup rodata data fpc bss');
+          AsmWriteLn('SECTION .debug_frame  use32 class=DWARF');
+          AsmWriteLn('SECTION .debug_info   use32 class=DWARF');
+          AsmWriteLn('SECTION .debug_line   use32 class=DWARF');
+          AsmWriteLn('SECTION .debug_abbrev use32 class=DWARF');
         end;
         end;
-      AsmWriteLn('SECTION ' + CodeSectionName);
-{$else i8086}
+      if not (cs_huge_code in current_settings.moduleswitches) then
+        AsmWriteLn('SECTION ' + CodeSectionName(current_module.modulename^));
+{$elseif defined(i386)}
       AsmWriteLn('BITS 32');
       AsmWriteLn('BITS 32');
-{$endif i8086}
+      using_relative:=false;
+{$elseif defined(x86_64)}
+      AsmWriteLn('BITS 64');
+      AsmWriteLn('default rel');
+      using_relative:=true;
+{$endif}
       end;
       end;
 
 
 
 
-    procedure T386NasmAssembler.WriteAsmList;
+    procedure TX86NasmAssembler.WriteAsmList;
     var
     var
       hal : tasmlisttype;
       hal : tasmlisttype;
     begin
     begin
@@ -1156,9 +1273,12 @@ interface
 
 
       for hal:=low(TasmlistType) to high(TasmlistType) do
       for hal:=low(TasmlistType) to high(TasmlistType) do
         begin
         begin
-          AsmWriteLn(target_asm.comment+'Begin asmlist '+AsmListTypeStr[hal]);
-          writetree(current_asmdata.asmlists[hal]);
-          AsmWriteLn(target_asm.comment+'End asmlist '+AsmListTypeStr[hal]);
+          if not (current_asmdata.asmlists[hal].empty) then
+            begin
+              AsmWriteLn(target_asm.comment+'Begin asmlist '+AsmListTypeStr[hal]);
+              writetree(current_asmdata.asmlists[hal]);
+              AsmWriteLn(target_asm.comment+'End asmlist '+AsmListTypeStr[hal]);
+            end;
         end;
         end;
 
 
       AsmLn;
       AsmLn;
@@ -1173,18 +1293,89 @@ interface
 {$endif EXTDEBUG}
 {$endif EXTDEBUG}
    end;
    end;
 
 
+    function TX86NasmAssembler.MakeCmdLine: TCmdStr;
+      var
+        FormatName : string;
+      begin
+        result:=Inherited MakeCmdLine;
+{$ifdef i8086}
+        case target_info.system of
+          system_i8086_msdos:
+            FormatName:='obj';
+        end;
+{$endif i8086}
+{$ifdef i386}
+        case target_info.system of
+          system_i386_go32v2:
+            FormatName:='coff';
+          system_i386_wdosx,
+          system_i386_win32:
+            FormatName:='win32';
+          system_i386_embedded:
+            FormatName:='obj';
+          system_i386_linux,
+          system_i386_beos:
+            FormatName:='elf';
+          system_i386_darwin:
+            FormatName:='macho32';
+        else
+          FormatName:='elf';
+        end;
+{$endif i386}
+{$ifdef x86_64}
+        case target_info.system of
+          system_x86_64_win64:
+            FormatName:='win64';
+          system_x86_64_darwin:
+            FormatName:='macho64';
+          system_x86_64_linux:
+            FormatName:='elf64';
+        else
+          FormatName:='elf64';
+        end;
+{$endif x86_64}
+        Replace(result,'$FORMAT',FormatName);
+      end;
 
 
 {*****************************************************************************
 {*****************************************************************************
                                   Initialize
                                   Initialize
 *****************************************************************************}
 *****************************************************************************}
 
 
+{$ifdef i8086}
+    const
+        as_i8086_nasm_info : tasminfo =
+          (
+            id           : as_i8086_nasm;
+            idtxt  : 'NASM';
+            asmbin : 'nasm';
+            asmcmd : '-f $FORMAT -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_i8086_msdos];
+            flags : [af_needar,af_no_debug];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+        as_i8086_nasmobj_info : tasminfo =
+          (
+            id           : as_i8086_nasmobj;
+            idtxt  : 'NASMOBJ';
+            asmbin : 'nasm';
+            asmcmd : '-f obj -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_i8086_msdos];
+            flags : [af_needar,af_no_debug];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+{$endif i8086}
+{$ifdef i386}
     const
     const
-       as_i386_nasmcoff_info : tasminfo =
+        as_i386_nasmcoff_info : tasminfo =
           (
           (
             id           : as_i386_nasmcoff;
             id           : as_i386_nasmcoff;
             idtxt  : 'NASMCOFF';
             idtxt  : 'NASMCOFF';
             asmbin : 'nasm';
             asmbin : 'nasm';
-            asmcmd : '-f coff -o $OBJ -w-orphan-labels $ASM';
+            asmcmd : '-f coff -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
             supported_targets : [system_i386_go32v2];
             supported_targets : [system_i386_go32v2];
             flags : [af_needar,af_no_debug];
             flags : [af_needar,af_no_debug];
             labelprefix : '..@';
             labelprefix : '..@';
@@ -1197,7 +1388,7 @@ interface
             id           : as_i386_nasmwin32;
             id           : as_i386_nasmwin32;
             idtxt  : 'NASMWIN32';
             idtxt  : 'NASMWIN32';
             asmbin : 'nasm';
             asmbin : 'nasm';
-            asmcmd : '-f win32 -o $OBJ -w-orphan-labels $ASM';
+            asmcmd : '-f win32 -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
             supported_targets : [system_i386_win32];
             supported_targets : [system_i386_win32];
             flags : [af_needar,af_no_debug];
             flags : [af_needar,af_no_debug];
             labelprefix : '..@';
             labelprefix : '..@';
@@ -1210,7 +1401,7 @@ interface
             id           : as_i386_nasmobj;
             id           : as_i386_nasmobj;
             idtxt  : 'NASMOBJ';
             idtxt  : 'NASMOBJ';
             asmbin : 'nasm';
             asmbin : 'nasm';
-            asmcmd : '-f obj -o $OBJ -w-orphan-labels $ASM';
+            asmcmd : '-f obj -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
             supported_targets : [system_i386_embedded, system_i8086_msdos];
             supported_targets : [system_i386_embedded, system_i8086_msdos];
             flags : [af_needar,af_no_debug];
             flags : [af_needar,af_no_debug];
             labelprefix : '..@';
             labelprefix : '..@';
@@ -1223,7 +1414,7 @@ interface
             id           : as_i386_nasmwdosx;
             id           : as_i386_nasmwdosx;
             idtxt  : 'NASMWDOSX';
             idtxt  : 'NASMWDOSX';
             asmbin : 'nasm';
             asmbin : 'nasm';
-            asmcmd : '-f win32 -o $OBJ -w-orphan-labels $ASM';
+            asmcmd : '-f win32 -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
             supported_targets : [system_i386_wdosx];
             supported_targets : [system_i386_wdosx];
             flags : [af_needar,af_no_debug];
             flags : [af_needar,af_no_debug];
             labelprefix : '..@';
             labelprefix : '..@';
@@ -1237,7 +1428,7 @@ interface
             id           : as_i386_nasmelf;
             id           : as_i386_nasmelf;
             idtxt  : 'NASMELF';
             idtxt  : 'NASMELF';
             asmbin : 'nasm';
             asmbin : 'nasm';
-            asmcmd : '-f elf -o $OBJ -w-orphan-labels $ASM';
+            asmcmd : '-f elf -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
             supported_targets : [system_i386_linux];
             supported_targets : [system_i386_linux];
             flags : [af_needar,af_no_debug];
             flags : [af_needar,af_no_debug];
             labelprefix : '..@';
             labelprefix : '..@';
@@ -1245,12 +1436,25 @@ interface
             dollarsign: '$';
             dollarsign: '$';
           );
           );
 
 
+       as_i386_nasmdarwin_info : tasminfo =
+          (
+            id           : as_i386_nasmdarwin;
+            idtxt  : 'NASMDARWIN';
+            asmbin : 'nasm';
+            asmcmd : '-f macho32 -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_i386_darwin];
+            flags : [af_needar,af_no_debug];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+
        as_i386_nasmbeos_info : tasminfo =
        as_i386_nasmbeos_info : tasminfo =
           (
           (
             id           : as_i386_nasmbeos;
             id           : as_i386_nasmbeos;
             idtxt  : 'NASMELF';
             idtxt  : 'NASMELF';
             asmbin : 'nasm';
             asmbin : 'nasm';
-            asmcmd : '-f elf -o $OBJ -w-orphan-labels $ASM';
+            asmcmd : '-f elf -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
             supported_targets : [system_i386_beos];
             supported_targets : [system_i386_beos];
             flags : [af_needar,af_no_debug];
             flags : [af_needar,af_no_debug];
             labelprefix : '..@';
             labelprefix : '..@';
@@ -1263,21 +1467,104 @@ interface
             id           : as_i386_nasmhaiku;
             id           : as_i386_nasmhaiku;
             idtxt  : 'NASMELF';
             idtxt  : 'NASMELF';
             asmbin : 'nasm';
             asmbin : 'nasm';
-            asmcmd : '-f elf -o $OBJ -w-orphan-labels $ASM';
+            asmcmd : '-f elf -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
             supported_targets : [system_i386_haiku];
             supported_targets : [system_i386_haiku];
             flags : [af_needar,af_no_debug];
             flags : [af_needar,af_no_debug];
             labelprefix : '..@';
             labelprefix : '..@';
             comment : '; ';
             comment : '; ';
             dollarsign: '$';
             dollarsign: '$';
           );
           );
+       as_i386_nasm_info : tasminfo =
+          (
+            id           : as_i386_nasm;
+            idtxt  : 'NASM';
+            asmbin : 'nasm';
+            asmcmd : '-f $FORMAT -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_any];
+            flags : [af_needar,af_no_debug];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+
+{$endif i386}
+{$ifdef x86_64}
+    const
+       as_x86_64_nasm_info : tasminfo =
+          (
+            id           : as_x86_64_nasm;
+            idtxt  : 'NASM';
+            asmbin : 'nasm';
+            asmcmd : '-f $FORMAT -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_any];
+            flags : [af_needar{,af_no_debug}];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+
+       as_x86_64_nasmwin64_info : tasminfo =
+          (
+            id           : as_x86_64_nasmwin64;
+            idtxt  : 'NASMWIN64';
+            asmbin : 'nasm';
+            asmcmd : '-f win64 -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_x86_64_win64];
+            flags : [af_needar,af_no_debug];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+
+       as_x86_64_nasmelf_info : tasminfo =
+          (
+            id           : as_x86_64_nasmelf;
+            idtxt  : 'NASMELF';
+            asmbin : 'nasm';
+            asmcmd : '-f elf64 -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_x86_64_linux];
+            flags : [af_needar,af_no_debug];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+
+
+       as_x86_64_nasmdarwin_info : tasminfo =
+          (
+            id           : as_x86_64_nasmdarwin;
+            idtxt  : 'NASMDARWIN';
+            asmbin : 'nasm';
+            asmcmd : '-f macho64 -o $OBJ -w-orphan-labels $EXTRAOPT $ASM';
+            supported_targets : [system_x86_64_darwin];
+            flags : [af_needar,af_no_debug];
+            labelprefix : '..@';
+            comment : '; ';
+            dollarsign: '$';
+          );
+
+{$endif x86_64}
 
 
 
 
 initialization
 initialization
-  RegisterAssembler(as_i386_nasmcoff_info,T386NasmAssembler);
-  RegisterAssembler(as_i386_nasmwin32_info,T386NasmAssembler);
-  RegisterAssembler(as_i386_nasmwdosx_info,T386NasmAssembler);
-  RegisterAssembler(as_i386_nasmobj_info,T386NasmAssembler);
-  RegisterAssembler(as_i386_nasmbeos_info,T386NasmAssembler);
-  RegisterAssembler(as_i386_nasmhaiku_info,T386NasmAssembler);
-  RegisterAssembler(as_i386_nasmelf_info,T386NasmAssembler);
+{$ifdef i8086}
+  RegisterAssembler(as_i8086_nasm_info,TX86NasmAssembler);
+  RegisterAssembler(as_i8086_nasmobj_info,TX86NasmAssembler);
+{$endif i8086}
+{$ifdef i386}
+  RegisterAssembler(as_i386_nasmcoff_info,TX86NasmAssembler);
+  RegisterAssembler(as_i386_nasmwin32_info,TX86NasmAssembler);
+  RegisterAssembler(as_i386_nasmwdosx_info,TX86NasmAssembler);
+  RegisterAssembler(as_i386_nasmobj_info,TX86NasmAssembler);
+  RegisterAssembler(as_i386_nasmbeos_info,TX86NasmAssembler);
+  RegisterAssembler(as_i386_nasmhaiku_info,TX86NasmAssembler);
+  RegisterAssembler(as_i386_nasmelf_info,TX86NasmAssembler);
+  RegisterAssembler(as_i386_nasm_info,TX86NasmAssembler);
+{$endif i386}
+{$ifdef x86_64}
+  RegisterAssembler(as_x86_64_nasm_info,TX86NasmAssembler);
+  RegisterAssembler(as_x86_64_nasmwin64_info,TX86NasmAssembler);
+  RegisterAssembler(as_x86_64_nasmelf_info,TX86NasmAssembler);
+  RegisterAssembler(as_x86_64_nasmdarwin_info,TX86NasmAssembler);
+{$endif x86_64}
 end.
 end.

+ 7 - 0
compiler/x86/cga.pas

@@ -44,6 +44,7 @@ interface
 
 
     procedure emit_const_reg_reg(i : tasmop;s : topsize;c : longint;reg1,reg2 : tregister);
     procedure emit_const_reg_reg(i : tasmop;s : topsize;c : longint;reg1,reg2 : tregister);
     procedure emit_reg_reg_reg(i : tasmop;s : topsize;reg1,reg2,reg3 : tregister);
     procedure emit_reg_reg_reg(i : tasmop;s : topsize;reg1,reg2,reg3 : tregister);
+    procedure emit_ref_reg_reg(i : tasmop;s : topsize;ref : treference;reg1,reg2 : tregister);
 
 
 
 
     procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
     procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
@@ -124,6 +125,12 @@ implementation
          current_asmdata.CurrAsmList.concat(Taicpu.Op_reg_reg_reg(i,s,reg1,reg2,reg3));
          current_asmdata.CurrAsmList.concat(Taicpu.Op_reg_reg_reg(i,s,reg1,reg2,reg3));
       end;
       end;
 
 
+    procedure emit_ref_reg_reg(i : tasmop;s : topsize;ref : treference;reg1,reg2 : tregister);
+      begin
+        tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,ref);
+        current_asmdata.CurrAsmList.concat(Taicpu.Op_ref_reg_reg(i,s,ref,reg1,reg2));
+      end;
+
     procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
     procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
       begin
       begin
         current_asmdata.CurrAsmList.concat(Taicpu.Op_sym(i,s,op));
         current_asmdata.CurrAsmList.concat(Taicpu.Op_sym(i,s,op));

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 513 - 158
compiler/x86/cgx86.pas


+ 43 - 4
compiler/x86/cpubase.pas

@@ -68,6 +68,7 @@ uses
 
 
    const
    const
       { Integer Super registers }
       { Integer Super registers }
+      RS_NO         = $ffffffff;
       RS_RAX        = $00;      {EAX}
       RS_RAX        = $00;      {EAX}
       RS_RCX        = $01;      {ECX}
       RS_RCX        = $01;      {ECX}
       RS_RDX        = $02;      {EDX}
       RS_RDX        = $02;      {EDX}
@@ -246,8 +247,16 @@ uses
     type
     type
       TResFlags = (F_E,F_NE,F_G,F_L,F_GE,F_LE,F_C,F_NC,
       TResFlags = (F_E,F_NE,F_G,F_L,F_GE,F_LE,F_C,F_NC,
                    F_A,F_AE,F_B,F_BE,
                    F_A,F_AE,F_B,F_BE,
-                   F_S,F_NS,F_O,F_NO);
+                   F_S,F_NS,F_O,F_NO,
+                   { For IEEE-compliant floating-point compares,
+                     same as normal counterparts but additionally check PF }
+                   F_FE,F_FNE,F_FA,F_FAE,F_FB,F_FBE);
 
 
+    const
+      FPUFlags = [F_FE,F_FNE,F_FA,F_FAE,F_FB,F_FBE];
+      FPUFlags2Flags: array[F_FE..F_FBE] of TResFlags = (
+        F_E,F_NE,F_A,F_AE,F_B,F_BE
+      );
 
 
 {*****************************************************************************
 {*****************************************************************************
                                  Constants
                                  Constants
@@ -292,6 +301,12 @@ uses
 {$ifdef i8086}
 {$ifdef i8086}
     { returns the next virtual register }
     { returns the next virtual register }
     function GetNextReg(const r : TRegister) : TRegister;
     function GetNextReg(const r : TRegister) : TRegister;
+
+    { return whether we need to add an extra FWAIT instruction before the given
+      instruction, when we're targeting the i8087. This includes almost all x87
+      instructions, but certain ones, which always have or have not a built in
+      FWAIT prefix are excluded (e.g. FINIT,FNINIT,etc.). }
+    function requires_fwait_on_8087(op: TAsmOp): boolean;
 {$endif i8086}
 {$endif i8086}
 
 
 implementation
 implementation
@@ -471,7 +486,8 @@ implementation
         inv_flags: array[TResFlags] of TResFlags =
         inv_flags: array[TResFlags] of TResFlags =
           (F_NE,F_E,F_LE,F_GE,F_L,F_G,F_NC,F_C,
           (F_NE,F_E,F_LE,F_GE,F_L,F_G,F_NC,F_C,
            F_BE,F_B,F_AE,F_A,
            F_BE,F_B,F_AE,F_A,
-           F_NS,F_S,F_NO,F_O);
+           F_NS,F_S,F_NO,F_O,
+           F_FNE,F_FE,F_FBE,F_FB,F_FAE,F_FA);
       begin
       begin
         f:=inv_flags[f];
         f:=inv_flags[f];
       end;
       end;
@@ -480,9 +496,12 @@ implementation
     function flags_to_cond(const f: TResFlags) : TAsmCond;
     function flags_to_cond(const f: TResFlags) : TAsmCond;
       const
       const
         flags_2_cond : array[TResFlags] of TAsmCond =
         flags_2_cond : array[TResFlags] of TAsmCond =
-          (C_E,C_NE,C_G,C_L,C_GE,C_LE,C_C,C_NC,C_A,C_AE,C_B,C_BE,C_S,C_NS,C_O,C_NO);
+          (C_E,C_NE,C_G,C_L,C_GE,C_LE,C_C,C_NC,C_A,C_AE,C_B,C_BE,C_S,C_NS,C_O,C_NO,
+           C_None,C_None,C_None,C_None,C_None,C_None);
       begin
       begin
         result := flags_2_cond[f];
         result := flags_2_cond[f];
+        if (result=C_None) then
+          InternalError(2014041301);
       end;
       end;
 
 
 
 
@@ -583,7 +602,9 @@ implementation
               { the remaining are distinct from each other }
               { the remaining are distinct from each other }
               exit(false);
               exit(false);
             end;
             end;
-          mm_compact,mm_large,mm_huge: internalerror(2013062303);
+          mm_compact,mm_large,mm_huge:
+            { all segment registers are different in these models }
+            exit(false);
           else
           else
             internalerror(2013062302);
             internalerror(2013062302);
         end;
         end;
@@ -605,6 +626,24 @@ implementation
           internalerror(2013051401);
           internalerror(2013051401);
         result:=TRegister(longint(r)+1);
         result:=TRegister(longint(r)+1);
       end;
       end;
+
+    function requires_fwait_on_8087(op: TAsmOp): boolean;
+      begin
+        case op of
+            A_F2XM1,A_FABS,A_FADD,A_FADDP,A_FBLD,A_FBSTP,A_FCHS,A_FCOM,A_FCOMP,
+            A_FCOMPP,A_FDECSTP,A_FDIV,A_FDIVP,A_FDIVR,A_FDIVRP,
+            A_FFREE,A_FIADD,A_FICOM,A_FICOMP,A_FIDIV,A_FIDIVR,A_FILD,
+            A_FIMUL,A_FINCSTP,A_FIST,A_FISTP,A_FISUB,A_FISUBR,A_FLD,A_FLD1,
+            A_FLDCW,A_FLDENV,A_FLDL2E,A_FLDL2T,A_FLDLG2,A_FLDLN2,A_FLDPI,A_FLDZ,
+            A_FMUL,A_FMULP,A_FNOP,A_FPATAN,A_FPREM,A_FPTAN,A_FRNDINT,
+            A_FRSTOR,A_FSCALE,A_FSQRT,A_FST,
+            A_FSTP,A_FSUB,A_FSUBP,A_FSUBR,A_FSUBRP,A_FTST,
+            A_FXAM,A_FXCH,A_FXTRACT,A_FYL2X,A_FYL2XP1:
+              result:=true;
+          else
+            result:=false;
+        end;
+      end;
 {$endif i8086}
 {$endif i8086}
 
 
 
 

+ 69 - 0
compiler/x86/ni86mem.pas

@@ -0,0 +1,69 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    Generate i386/i8086 assembler for memory related nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ni86mem;
+
+{$i fpcdefs.inc}
+
+interface
+    uses
+      globtype,
+      cgbase,cpuinfo,cpubase,
+      node,nmem,ncgmem,nx86mem;
+
+    type
+      ti86addrnode = class(tcgaddrnode)
+       protected
+        procedure set_absvarsym_resultdef; virtual; abstract;
+        function typecheck_non_proc(realsource: tnode; out res: tnode): boolean; override;
+      end;
+
+implementation
+
+    uses
+      cutils,verbose,
+      aasmtai,aasmdata,
+      cgutils,cgobj,
+      nld,
+      symconst,symdef,symcpu;
+
+{*****************************************************************************
+                           TI86ADDRNODE
+*****************************************************************************}
+
+  function ti86addrnode.typecheck_non_proc(realsource: tnode; out res: tnode): boolean;
+    begin
+      res:=nil;
+      { if we are getting the address of an absolute sym, check whether it's
+        a near or a far pointer }
+      if (realsource.nodetype=loadn) and
+         ((tloadnode(realsource).symtableentry.typ=absolutevarsym) and
+         tcpuabsolutevarsym(tloadnode(realsource).symtableentry).absseg) then
+        begin
+          set_absvarsym_resultdef;
+          result:=true;
+        end
+      else
+        result:=inherited;
+    end;
+
+
+end.

+ 197 - 138
compiler/x86/nx86add.pas

@@ -35,13 +35,14 @@ unit nx86add;
       tx86addnode = class(tcgaddnode)
       tx86addnode = class(tcgaddnode)
       protected
       protected
         function  getresflags(unsigned : boolean) : tresflags;
         function  getresflags(unsigned : boolean) : tresflags;
+        function  getfpuresflags : tresflags;
         procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
         procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
-        procedure check_left_and_right_fpureg(force_fpureg: boolean);
+        procedure force_left_and_right_fpureg;
+        procedure prepare_x87_locations(out refnode: tnode);
         procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
         procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
         procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
         procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
 
 
-        procedure second_cmpfloatsse;
-        procedure second_cmpfloatavx;
+        procedure second_cmpfloatvector;
 
 
         procedure second_addfloatsse;
         procedure second_addfloatsse;
         procedure second_addfloatavx;
         procedure second_addfloatavx;
@@ -65,7 +66,7 @@ unit nx86add;
   implementation
   implementation
 
 
     uses
     uses
-      globtype,globals,
+      globtype,globals,systems,
       verbose,cutils,
       verbose,cutils,
       cpuinfo,
       cpuinfo,
       aasmbase,aasmtai,aasmdata,aasmcpu,
       aasmbase,aasmtai,aasmdata,aasmcpu,
@@ -85,6 +86,7 @@ unit nx86add;
         power : longint;
         power : longint;
         hl4   : tasmlabel;
         hl4   : tasmlabel;
         r     : Tregister;
         r     : Tregister;
+        href  : treference;
       begin
       begin
         { at this point, left.location.loc should be LOC_REGISTER }
         { at this point, left.location.loc should be LOC_REGISTER }
         if right.location.loc=LOC_REGISTER then
         if right.location.loc=LOC_REGISTER then
@@ -129,7 +131,14 @@ unit nx86add;
                   (right.location.loc=LOC_CONSTANT) and
                   (right.location.loc=LOC_CONSTANT) and
                   (right.location.value=0) then
                   (right.location.value=0) then
                  begin
                  begin
-                   emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
+                { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
+                   spilling, while 'test %reg,%reg' still requires loading into register.
+                   If spilling is not necessary, it is changed back into 'test %reg,%reg' by
+                   peephole optimizer (this optimization is currently available only for i386). }
+                   if (target_info.cpu=cpu_i386) then
+                     emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
+                   else  
+                     emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
                  end
                  end
                else
                else
                  if (op=A_ADD) and
                  if (op=A_ADD) and
@@ -156,6 +165,18 @@ unit nx86add;
                   begin
                   begin
                     emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
                     emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
                   end
                   end
+                else if (op=A_IMUL) and
+                    (right.location.loc=LOC_CONSTANT) and
+                    (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
+                    (power in [1..3]) and
+                    not(cs_check_overflow in current_settings.localswitches) then
+                  begin
+                    reference_reset_base(href,left.location.register,0,0);
+                    href.index:=left.location.register;
+                    href.scalefactor:=int64(right.location.value)-1;
+                    left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+                    current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
+                  end
                else
                else
                  begin
                  begin
                    if extra_not then
                    if extra_not then
@@ -210,7 +231,7 @@ unit nx86add;
               { maybe we can reuse a constant register when the
               { maybe we can reuse a constant register when the
                 operation is a comparison that doesn't change the
                 operation is a comparison that doesn't change the
                 value of the register }
                 value of the register }
-              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
+                hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
             end;
             end;
           end;
           end;
         if (right.location.loc<>LOC_CONSTANT) and
         if (right.location.loc<>LOC_CONSTANT) and
@@ -222,25 +243,21 @@ unit nx86add;
        end;
        end;
 
 
 
 
-    procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
+    procedure tx86addnode.force_left_and_right_fpureg;
       begin
       begin
         if (right.location.loc<>LOC_FPUREGISTER) then
         if (right.location.loc<>LOC_FPUREGISTER) then
-         begin
-           if (force_fpureg) then
-             begin
-               location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
-                if (left.location.loc<>LOC_FPUREGISTER) then
-                  location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
-                else
-                  { left was on the stack => swap }
-                  toggleflag(nf_swapped);
-             end
-         end
+          begin
+            hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
+            if (left.location.loc<>LOC_FPUREGISTER) then
+              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
+            else
+              { left was on the stack => swap }
+              toggleflag(nf_swapped);
+          end
         { the nominator in st0 }
         { the nominator in st0 }
         else if (left.location.loc<>LOC_FPUREGISTER) then
         else if (left.location.loc<>LOC_FPUREGISTER) then
           begin
           begin
-            if (force_fpureg) then
-              location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
+            hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
           end
           end
         else
         else
           begin
           begin
@@ -250,6 +267,55 @@ unit nx86add;
       end;
       end;
 
 
 
 
+    { Makes sides suitable for executing an x87 instruction:
+      if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
+      everything else is loaded to FPU stack. }
+    procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
+      begin
+        refnode:=nil;
+        case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
+          0:
+            begin
+              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
+              if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
+                InternalError(2013090803);
+              if (left.location.size in [OS_F32,OS_F64]) then
+                begin
+                  refnode:=left;
+                  toggleflag(nf_swapped);
+                end
+              else
+                hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
+            end;
+          1:
+            begin   { if left is on the stack then swap. }
+              if (left.location.loc=LOC_FPUREGISTER) then
+                refnode:=right
+              else
+                refnode:=left;
+              if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
+                InternalError(2013090801);
+              if not (refnode.location.size in [OS_F32,OS_F64]) then
+                begin
+                  hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
+                  if (refnode=right) then
+                    toggleflag(nf_swapped);
+                  refnode:=nil;
+                end
+              else
+                begin
+                  if (refnode=left) then
+                    toggleflag(nf_swapped);
+                end;
+            end;
+          2: { fpu operands are always in the wrong order on the stack }
+            toggleflag(nf_swapped);
+        else
+          InternalError(2013090802);
+        end;
+      end;
+
+
     procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
     procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
 {$ifdef x86_64}
 {$ifdef x86_64}
       var
       var
@@ -304,6 +370,8 @@ unit nx86add;
                      lten : getresflags:=F_GE;
                      lten : getresflags:=F_GE;
                      gtn : getresflags:=F_L;
                      gtn : getresflags:=F_L;
                      gten : getresflags:=F_LE;
                      gten : getresflags:=F_LE;
+                     else
+                       internalerror(2013120105);
                   end
                   end
                 else
                 else
                   case nodetype of
                   case nodetype of
@@ -311,6 +379,8 @@ unit nx86add;
                      lten : getresflags:=F_LE;
                      lten : getresflags:=F_LE;
                      gtn : getresflags:=F_G;
                      gtn : getresflags:=F_G;
                      gten : getresflags:=F_GE;
                      gten : getresflags:=F_GE;
+                     else
+                       internalerror(2013120106);
                   end;
                   end;
              end
              end
            else
            else
@@ -321,6 +391,8 @@ unit nx86add;
                      lten : getresflags:=F_AE;
                      lten : getresflags:=F_AE;
                      gtn : getresflags:=F_B;
                      gtn : getresflags:=F_B;
                      gten : getresflags:=F_BE;
                      gten : getresflags:=F_BE;
+                     else
+                       internalerror(2013120107);
                   end
                   end
                 else
                 else
                   case nodetype of
                   case nodetype of
@@ -328,12 +400,40 @@ unit nx86add;
                      lten : getresflags:=F_BE;
                      lten : getresflags:=F_BE;
                      gtn : getresflags:=F_A;
                      gtn : getresflags:=F_A;
                      gten : getresflags:=F_AE;
                      gten : getresflags:=F_AE;
+                     else
+                       internalerror(2013120108);
                   end;
                   end;
              end;
              end;
          end;
          end;
       end;
       end;
 
 
 
 
+    function tx86addnode.getfpuresflags : tresflags;
+      begin
+        if (nodetype=equaln) then
+          result:=F_FE
+        else if (nodetype=unequaln) then
+          result:=F_FNE
+        else if (nf_swapped in flags) then
+          case nodetype of
+            ltn : result:=F_FA;
+            lten : result:=F_FAE;
+            gtn : result:=F_FB;
+            gten : result:=F_FBE;
+          else
+            internalerror(2014031402);
+          end
+        else
+          case nodetype of
+            ltn : result:=F_FB;
+            lten : result:=F_FBE;
+            gtn : result:=F_FA;
+            gten : result:=F_FAE;
+          else
+            internalerror(2014031403);
+          end;
+      end;
+
 {*****************************************************************************
 {*****************************************************************************
                                 AddSmallSet
                                 AddSmallSet
 *****************************************************************************}
 *****************************************************************************}
@@ -699,7 +799,9 @@ unit nx86add;
           end;
           end;
 
 
         pass_left_right;
         pass_left_right;
-        check_left_and_right_fpureg(false);
+        { fpu operands are always in reversed order on the stack }
+        if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
+          toggleflag(nf_swapped);
 
 
         if (nf_swapped in flags) then
         if (nf_swapped in flags) then
           { can't use swapleftright if both are on the fpu stack, since then }
           { can't use swapleftright if both are on the fpu stack, since then }
@@ -838,7 +940,9 @@ unit nx86add;
 {$endif dummy}
 {$endif dummy}
 
 
         pass_left_right;
         pass_left_right;
-        check_left_and_right_fpureg(false);
+        { fpu operands are always in reversed order on the stack }
+        if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
+          toggleflag(nf_swapped);
 
 
         if (nf_swapped in flags) then
         if (nf_swapped in flags) then
           { can't use swapleftright if both are on the fpu stack, since then }
           { can't use swapleftright if both are on the fpu stack, since then }
@@ -968,94 +1072,33 @@ unit nx86add;
       end;
       end;
 
 
 
 
-    procedure tx86addnode.second_cmpfloatsse;
+    procedure tx86addnode.second_cmpfloatvector;
       var
       var
         op : tasmop;
         op : tasmop;
+      const
+        ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
+        ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
       begin
       begin
         if is_single(left.resultdef) then
         if is_single(left.resultdef) then
-          op:=A_COMISS
+          op:=ops_single[UseAVX]
         else if is_double(left.resultdef) then
         else if is_double(left.resultdef) then
-          op:=A_COMISD
+          op:=ops_double[UseAVX]
         else
         else
           internalerror(200402222);
           internalerror(200402222);
         pass_left_right;
         pass_left_right;
 
 
-        location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
-        { we can use only right as left operand if the operation is commutative }
-        if (right.location.loc=LOC_MMREGISTER) then
-          begin
-            { force floating point reg. location to be written to memory,
-              we don't force it to mm register because writing to memory
-              allows probably shorter code because there is no direct fpu->mm register
-              copy instruction
-            }
-            if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
-              hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
-            case left.location.loc of
-              LOC_REFERENCE,LOC_CREFERENCE:
-                begin
-                  tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
-                  current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
-                end;
-              LOC_MMREGISTER,LOC_CMMREGISTER:
-                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
-              else
-                internalerror(200402221);
-            end;
-            if nf_swapped in flags then
-              exclude(flags,nf_swapped)
-            else
-              include(flags,nf_swapped)
-          end
-        else
-          begin
-            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
-            { force floating point reg. location to be written to memory,
-              we don't force it to mm register because writing to memory
-              allows probably shorter code because there is no direct fpu->mm register
-              copy instruction
-            }
-            if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
-              hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
-            case right.location.loc of
-              LOC_REFERENCE,LOC_CREFERENCE:
-                begin
-                  tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
-                  current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
-                end;
-              LOC_MMREGISTER,LOC_CMMREGISTER:
-                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
-              else
-                internalerror(200402223);
-            end;
-          end;
-        location.resflags:=getresflags(true);
-      end;
-
+        location_reset(location,LOC_FLAGS,OS_NO);
 
 
-    procedure tx86addnode.second_cmpfloatavx;
-      var
-        op : tasmop;
-      begin
-        if is_single(left.resultdef) then
-          op:=A_VCOMISS
-        else if is_double(left.resultdef) then
-          op:=A_VCOMISD
-        else
-          internalerror(200402222);
-        pass_left_right;
+        { Direct move fpu->mm register is not possible, so force any fpu operands to
+          memory (not to mm registers because one of the memory locations can be used
+          directly in compare instruction, yielding shorter code) }
+        if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
+          hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
+        if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
+          hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
 
 
-        location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
-        { we can use only right as left operand if the operation is commutative }
-        if (right.location.loc=LOC_MMREGISTER) then
+        if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
           begin
           begin
-            { force floating point reg. location to be written to memory,
-              we don't force it to mm register because writing to memory
-              allows probably shorter code because there is no direct fpu->mm register
-              copy instruction
-            }
-            if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
-              hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
             case left.location.loc of
             case left.location.loc of
               LOC_REFERENCE,LOC_CREFERENCE:
               LOC_REFERENCE,LOC_CREFERENCE:
                 begin
                 begin
@@ -1067,21 +1110,11 @@ unit nx86add;
               else
               else
                 internalerror(200402221);
                 internalerror(200402221);
             end;
             end;
-            if nf_swapped in flags then
-              exclude(flags,nf_swapped)
-            else
-              include(flags,nf_swapped)
+            toggleflag(nf_swapped);
           end
           end
         else
         else
           begin
           begin
             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
-            { force floating point reg. location to be written to memory,
-              we don't force it to mm register because writing to memory
-              allows probably shorter code because there is no direct fpu->mm register
-              copy instruction
-            }
-            if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
-              hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
             case right.location.loc of
             case right.location.loc of
               LOC_REFERENCE,LOC_CREFERENCE:
               LOC_REFERENCE,LOC_CREFERENCE:
                 begin
                 begin
@@ -1094,7 +1127,9 @@ unit nx86add;
                 internalerror(200402223);
                 internalerror(200402223);
             end;
             end;
           end;
           end;
-        location.resflags:=getresflags(true);
+        location.resflags:=getfpuresflags;
+        location_freetemp(current_asmdata.CurrAsmList,left.location);
+        location_freetemp(current_asmdata.CurrAsmList,right.location);
       end;
       end;
 
 
 
 
@@ -1145,8 +1180,17 @@ unit nx86add;
 
 
 
 
     procedure tx86addnode.second_addfloat;
     procedure tx86addnode.second_addfloat;
+      const
+        ops_add:  array[boolean] of TAsmOp = (A_FADDP,A_FADD);
+        ops_mul:  array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
+        ops_sub:  array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
+        ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
+        ops_div:  array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
+        ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
       var
       var
         op : TAsmOp;
         op : TAsmOp;
+        refnode : tnode;
+        hasref : boolean;
       begin
       begin
         if use_vectorfpu(resultdef) then
         if use_vectorfpu(resultdef) then
           begin
           begin
@@ -1158,34 +1202,36 @@ unit nx86add;
           end;
           end;
 
 
         pass_left_right;
         pass_left_right;
+        prepare_x87_locations(refnode);
+        hasref:=assigned(refnode);
 
 
         case nodetype of
         case nodetype of
           addn :
           addn :
-            op:=A_FADDP;
+            op:=ops_add[hasref];
           muln :
           muln :
-            op:=A_FMULP;
+            op:=ops_mul[hasref];
           subn :
           subn :
-            op:=A_FSUBP;
+            if (nf_swapped in flags) then
+              op:=ops_rsub[hasref]
+            else
+              op:=ops_sub[hasref];
           slashn :
           slashn :
-            op:=A_FDIVP;
+            if (nf_swapped in flags) then
+              op:=ops_rdiv[hasref]
+            else
+              op:=ops_div[hasref];
           else
           else
             internalerror(2003042214);
             internalerror(2003042214);
         end;
         end;
 
 
-        check_left_and_right_fpureg(true);
-
-        { if we swaped the tree nodes, then use the reverse operator }
-        if nf_swapped in flags then
+        if hasref then
+          emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
+        else
           begin
           begin
-             if (nodetype=slashn) then
-               op:=A_FDIVRP
-             else if (nodetype=subn) then
-               op:=A_FSUBRP;
+            emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
+            tcgx86(cg).dec_fpu_stack;
           end;
           end;
 
 
-        emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
-        tcgx86(cg).dec_fpu_stack;
-
         location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
         location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
         location.register:=NR_ST;
         location.register:=NR_ST;
       end;
       end;
@@ -1199,15 +1245,12 @@ unit nx86add;
       begin
       begin
         if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
         if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
           begin
           begin
-            if UseAVX then
-              second_cmpfloatavx
-            else
-              second_cmpfloatsse;
+            second_cmpfloatvector;
             exit;
             exit;
           end;
           end;
 
 
         pass_left_right;
         pass_left_right;
-        check_left_and_right_fpureg(true);
+        force_left_and_right_fpureg;
 
 
 {$ifndef x86_64}
 {$ifndef x86_64}
         if current_settings.cputype<cpu_Pentium2 then
         if current_settings.cputype<cpu_Pentium2 then
@@ -1221,9 +1264,11 @@ unit nx86add;
             if current_settings.cputype < cpu_286 then
             if current_settings.cputype < cpu_286 then
               begin
               begin
                 tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
                 tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
-                emit_ref(A_FNSTSW,S_NO,tmpref);
+                emit_ref(A_FSTSW,S_NO,tmpref);
                 cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
                 cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
-                emit_ref_reg(A_MOV,S_W,tmpref,NR_AX);
+                inc(tmpref.offset);
+                emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
+                dec(tmpref.offset);
                 emit_none(A_SAHF,S_NO);
                 emit_none(A_SAHF,S_NO);
                 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
                 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
                 tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
                 tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
@@ -1248,7 +1293,7 @@ unit nx86add;
           end;
           end;
 
 
         location_reset(location,LOC_FLAGS,OS_NO);
         location_reset(location,LOC_FLAGS,OS_NO);
-        location.resflags:=getresflags(true);
+        location.resflags:=getfpuresflags;
       end;
       end;
 
 
 
 
@@ -1295,11 +1340,25 @@ unit nx86add;
 
 
          pass_left_right;
          pass_left_right;
 
 
-         left_must_be_reg(opdef,opsize,false);
-         emit_generic_code(A_CMP,opsize,unsigned,false,false);
-         location_freetemp(current_asmdata.CurrAsmList,right.location);
-         location_freetemp(current_asmdata.CurrAsmList,left.location);
-
+         if (right.location.loc=LOC_CONSTANT) and
+            (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
+{$ifdef x86_64}
+              and ((not (opsize in [OS_64,OS_S64])) or (
+              (right.location.value>=low(longint)) and (right.location.value<=high(longint))
+            ))
+{$endif x86_64}
+         then
+           begin
+             emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
+             location_freetemp(current_asmdata.CurrAsmList,left.location);
+           end
+         else
+           begin
+             left_must_be_reg(opdef,opsize,false);
+             emit_generic_code(A_CMP,opsize,unsigned,false,false);
+             location_freetemp(current_asmdata.CurrAsmList,right.location);
+             location_freetemp(current_asmdata.CurrAsmList,left.location);
+           end;
          location_reset(location,LOC_FLAGS,OS_NO);
          location_reset(location,LOC_FLAGS,OS_NO);
          location.resflags:=getresflags(unsigned);
          location.resflags:=getresflags(unsigned);
       end;
       end;

+ 16 - 1
compiler/x86/nx86cal.pas

@@ -29,6 +29,7 @@ interface
 
 
     uses
     uses
       symdef,
       symdef,
+      cgutils,
       ncgcal;
       ncgcal;
 
 
     type
     type
@@ -39,6 +40,8 @@ interface
         protected
         protected
          procedure do_release_unused_return_value;override;
          procedure do_release_unused_return_value;override;
          procedure set_result_location(realresdef: tstoreddef);override;
          procedure set_result_location(realresdef: tstoreddef);override;
+         function can_call_ref(var ref: treference):boolean;override;
+         procedure do_call_ref(ref: treference);override;
        end;
        end;
 
 
 
 
@@ -46,7 +49,7 @@ implementation
 
 
     uses
     uses
       cgobj,
       cgobj,
-      cgbase,cgutils,cpubase,cgx86,cga;
+      cgbase,cpubase,cgx86,cga,aasmdata,aasmcpu;
 
 
 
 
 {*****************************************************************************
 {*****************************************************************************
@@ -81,4 +84,16 @@ implementation
     end;
     end;
 
 
 
 
+  function tx86callnode.can_call_ref(var ref: treference): boolean;
+    begin
+      tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,ref);
+      result:=true;
+    end;
+
+
+  procedure tx86callnode.do_call_ref(ref: treference);
+    begin
+      current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_CALL,S_NO,ref));
+    end;
+
 end.
 end.

+ 5 - 7
compiler/x86/nx86cnv.pas

@@ -389,8 +389,9 @@ implementation
                 begin
                 begin
                    { unsigned 64 bit ints are harder to handle:
                    { unsigned 64 bit ints are harder to handle:
                      we load bits 0..62 and then check bit 63:
                      we load bits 0..62 and then check bit 63:
-                     if it is 1 then we add $80000000 000000000
-                     as double                                  }
+                     if it is 1 then we add 2**64 as float.
+                     Since 2**64 can be represented exactly, use a single-precision
+                     constant to save space. }
                    current_asmdata.getdatalabel(l1);
                    current_asmdata.getdatalabel(l1);
                    current_asmdata.getjumplabel(l2);
                    current_asmdata.getjumplabel(l2);
     
     
@@ -430,13 +431,10 @@ implementation
                    new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
                    new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
                    current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
                    current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
                    { I got this constant from a test program (FK) }
                    { I got this constant from a test program (FK) }
-                   current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_32bit(0));
-                   current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_32bit(longint ($80000000)));
-                   current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_32bit($0000403f));
+                   current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_32bit($5f800000));
                    reference_reset_symbol(href,l1,0,4);
                    reference_reset_symbol(href,l1,0,4);
                    tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,href);
                    tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,href);
-                   current_asmdata.CurrAsmList.concat(Taicpu.Op_ref(A_FLD,S_FX,href));
-                   current_asmdata.CurrAsmList.concat(Taicpu.Op_reg_reg(A_FADDP,S_NO,NR_ST,NR_ST1));
+                   current_asmdata.CurrAsmList.concat(Taicpu.Op_ref(A_FADD,S_FS,href));
                    cg.a_label(current_asmdata.CurrAsmList,l2);
                    cg.a_label(current_asmdata.CurrAsmList,l2);
                 end
                 end
               else
               else

+ 281 - 30
compiler/x86/nx86inl.pas

@@ -45,6 +45,7 @@ interface
           function first_round_real: tnode; override;
           function first_round_real: tnode; override;
           function first_trunc_real: tnode; override;
           function first_trunc_real: tnode; override;
           function first_popcnt: tnode; override;
           function first_popcnt: tnode; override;
+          function first_fma: tnode; override;
           { second pass override to generate these nodes }
           { second pass override to generate these nodes }
           procedure second_IncludeExclude;override;
           procedure second_IncludeExclude;override;
           procedure second_pi; override;
           procedure second_pi; override;
@@ -64,6 +65,7 @@ interface
           procedure second_abs_long;override;
           procedure second_abs_long;override;
 {$endif not i8086}
 {$endif not i8086}
           procedure second_popcnt;override;
           procedure second_popcnt;override;
+          procedure second_fma;override;
        private
        private
           procedure load_fpu_location(lnode: tnode);
           procedure load_fpu_location(lnode: tnode);
        end;
        end;
@@ -77,7 +79,7 @@ implementation
       symconst,
       symconst,
       defutil,
       defutil,
       aasmbase,aasmtai,aasmdata,aasmcpu,
       aasmbase,aasmtai,aasmdata,aasmcpu,
-      symtype,symdef,
+      symtype,symdef,symcpu,
       cgbase,pass_2,
       cgbase,pass_2,
       cpuinfo,cpubase,paramgr,
       cpuinfo,cpubase,paramgr,
       nbas,ncon,ncal,ncnv,nld,ncgutil,
       nbas,ncon,ncal,ncnv,nld,ncgutil,
@@ -91,15 +93,34 @@ implementation
 
 
      function tx86inlinenode.first_pi : tnode;
      function tx86inlinenode.first_pi : tnode;
       begin
       begin
-        expectloc:=LOC_FPUREGISTER;
-        first_pi := nil;
+        if (tfloatdef(pbestrealtype^).floattype=s80real) then
+          begin
+            expectloc:=LOC_FPUREGISTER;
+            first_pi := nil;
+          end
+        else
+          result:=inherited;
       end;
       end;
 
 
 
 
      function tx86inlinenode.first_arctan_real : tnode;
      function tx86inlinenode.first_arctan_real : tnode;
       begin
       begin
-        expectloc:=LOC_FPUREGISTER;
-        first_arctan_real := nil;
+{$ifdef i8086}
+        { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
+          so we need to use the RTL helper on these FPUs }
+        if current_settings.cputype < cpu_386 then
+          begin
+            result := inherited;
+            exit;
+          end;
+{$endif i8086}
+        if (tfloatdef(pbestrealtype^).floattype=s80real) then
+          begin
+            expectloc:=LOC_FPUREGISTER;
+            first_arctan_real := nil;
+          end
+        else
+          result:=inherited;
       end;
       end;
 
 
      function tx86inlinenode.first_abs_real : tnode;
      function tx86inlinenode.first_abs_real : tnode;
@@ -113,20 +134,31 @@ implementation
 
 
      function tx86inlinenode.first_sqr_real : tnode;
      function tx86inlinenode.first_sqr_real : tnode;
       begin
       begin
-        expectloc:=LOC_FPUREGISTER;
+        if use_vectorfpu(resultdef) then
+          expectloc:=LOC_MMREGISTER
+        else
+          expectloc:=LOC_FPUREGISTER;
         first_sqr_real := nil;
         first_sqr_real := nil;
       end;
       end;
 
 
      function tx86inlinenode.first_sqrt_real : tnode;
      function tx86inlinenode.first_sqrt_real : tnode;
       begin
       begin
-        expectloc:=LOC_FPUREGISTER;
+        if use_vectorfpu(resultdef) then
+          expectloc:=LOC_MMREGISTER
+        else
+          expectloc:=LOC_FPUREGISTER;
         first_sqrt_real := nil;
         first_sqrt_real := nil;
       end;
       end;
 
 
      function tx86inlinenode.first_ln_real : tnode;
      function tx86inlinenode.first_ln_real : tnode;
       begin
       begin
-        expectloc:=LOC_FPUREGISTER;
-        first_ln_real := nil;
+        if (tfloatdef(pbestrealtype^).floattype=s80real) then
+          begin
+            expectloc:=LOC_FPUREGISTER;
+            first_ln_real := nil;
+          end
+        else
+          result:=inherited;
       end;
       end;
 
 
      function tx86inlinenode.first_cos_real : tnode;
      function tx86inlinenode.first_cos_real : tnode;
@@ -139,8 +171,13 @@ implementation
             exit;
             exit;
           end;
           end;
 {$endif i8086}
 {$endif i8086}
-        expectloc:=LOC_FPUREGISTER;
-        first_cos_real := nil;
+        if (tfloatdef(pbestrealtype^).floattype=s80real) then
+          begin
+            expectloc:=LOC_FPUREGISTER;
+            result:=nil;
+          end
+        else
+          result:=inherited;
       end;
       end;
 
 
      function tx86inlinenode.first_sin_real : tnode;
      function tx86inlinenode.first_sin_real : tnode;
@@ -153,8 +190,13 @@ implementation
             exit;
             exit;
           end;
           end;
 {$endif i8086}
 {$endif i8086}
-        expectloc:=LOC_FPUREGISTER;
-        first_sin_real := nil;
+        if (tfloatdef(pbestrealtype^).floattype=s80real) then
+          begin
+            expectloc:=LOC_FPUREGISTER;
+            result:=nil;
+          end
+        else
+          result:=inherited;
       end;
       end;
 
 
 
 
@@ -194,18 +236,35 @@ implementation
      function tx86inlinenode.first_popcnt: tnode;
      function tx86inlinenode.first_popcnt: tnode;
        begin
        begin
          Result:=nil;
          Result:=nil;
-         if (current_settings.fputype<fpu_sse42)
-{$ifdef i386}
-           or is_64bit(left.resultdef)
-{$endif i386}
+{$ifndef i8086}
+         if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
+  {$ifdef i386}
+            and not is_64bit(left.resultdef)
+  {$endif i386}
            then
            then
+             expectloc:=LOC_REGISTER
+         else
+{$endif not i8086}
            Result:=inherited first_popcnt
            Result:=inherited first_popcnt
+       end;
+
+
+     function tx86inlinenode.first_fma : tnode;
+       begin
+{$ifndef i8086}
+         if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
+           ((is_double(resultdef)) or (is_single(resultdef))) then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
          else
          else
-           expectloc:=LOC_REGISTER;
+{$endif i8086}
+           Result:=inherited first_fma;
        end;
        end;
 
 
 
 
-     procedure tx86inlinenode.second_Pi;
+     procedure tx86inlinenode.second_pi;
        begin
        begin
          location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
          location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
          emit_none(A_FLDPI,S_NO);
          emit_none(A_FLDPI,S_NO);
@@ -236,7 +295,7 @@ implementation
            LOC_MMREGISTER,LOC_CMMREGISTER:
            LOC_MMREGISTER,LOC_CMMREGISTER:
              begin
              begin
                location:=lnode.location;
                location:=lnode.location;
-               location_force_fpureg(current_asmdata.CurrAsmList,location,false);
+               hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
              end;
              end;
            else
            else
              internalerror(309991);
              internalerror(309991);
@@ -259,18 +318,39 @@ implementation
          if use_vectorfpu(resultdef) then
          if use_vectorfpu(resultdef) then
            begin
            begin
              secondpass(left);
              secondpass(left);
-             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
-             location:=left.location;
+             if left.location.loc<>LOC_MMREGISTER then
+               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
+             if UseAVX then
+               begin
+                 location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+               end
+             else
+               location:=left.location;
              case tfloatdef(resultdef).floattype of
              case tfloatdef(resultdef).floattype of
                s32real:
                s32real:
-                 reference_reset_symbol(href,current_asmdata.RefAsmSymbol('FPC_ABSMASK_SINGLE'),0,4);
+                 begin
+                   reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE'),0,4);
+                   tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
+                   if UseAVX then
+                     current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
+                       A_VANDPS,S_XMM,href,left.location.register,location.register))
+                   else
+                     current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
+                 end;
                s64real:
                s64real:
-                 reference_reset_symbol(href,current_asmdata.RefAsmSymbol('FPC_ABSMASK_DOUBLE'),0,4);
+                 begin
+                   reference_reset_symbol(href,current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE'),0,4);
+                   tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
+                   if UseAVX then
+                     current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
+                       A_VANDPD,S_XMM,href,left.location.register,location.register))
+                   else
+                     current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
+                 end;
                else
                else
                  internalerror(200506081);
                  internalerror(200506081);
              end;
              end;
-             tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
-             current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register))
            end
            end
          else
          else
            begin
            begin
@@ -367,8 +447,19 @@ implementation
               begin
               begin
                 tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
                 tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
                 tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
                 tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
-                emit_ref(A_FNSTCW,S_NO,newcw);
-                emit_ref(A_FNSTCW,S_NO,oldcw);
+{$ifdef i8086}
+                if current_settings.cputype<=cpu_286 then
+                  begin
+                    emit_ref(A_FSTCW,S_NO,newcw);
+                    emit_ref(A_FSTCW,S_NO,oldcw);
+                    emit_none(A_FWAIT,S_NO);
+                  end
+                else
+{$endif i8086}
+                  begin
+                    emit_ref(A_FNSTCW,S_NO,newcw);
+                    emit_ref(A_FNSTCW,S_NO,oldcw);
+                  end;
                 emit_const_ref(A_OR,S_W,$0f00,newcw);
                 emit_const_ref(A_OR,S_W,$0f00,newcw);
                 load_fpu_location(left);
                 load_fpu_location(left);
                 emit_ref(A_FLDCW,S_NO,newcw);
                 emit_ref(A_FLDCW,S_NO,newcw);
@@ -663,8 +754,168 @@ implementation
         location_reset(location,LOC_REGISTER,opsize);
         location_reset(location,LOC_REGISTER,opsize);
         location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
         location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
         if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
         if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
-          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register))
+          emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
         else
         else
-          current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register));
+          emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
       end;
       end;
+
+
+    procedure tx86inlinenode.second_fma;
+      const
+        op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
+          (
+           { positive product }
+           (
+            { positive third operand }
+            ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
+             (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
+            ),
+            { negative third operand }
+            ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
+             (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
+            )
+           ),
+           { negative product }
+           (
+            { positive third operand }
+            ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
+             (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
+            ),
+            { negative third operand }
+            ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
+             (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
+            )
+           )
+          );
+
+      var
+        paraarray : array[1..3] of tnode;
+        memop,
+        i : integer;
+        negop3,
+        negproduct,
+        gotmem : boolean;
+        hp : tnode;
+      begin
+{$ifndef i8086}
+         if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
+           begin
+             negop3:=false;
+             negproduct:=false;
+             paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
+             paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+             paraarray[3]:=tcallparanode(parameters).paravalue;
+
+             { check if a neg. node can be removed
+               this is possible because changing the sign of
+               a floating point number does not affect its absolute
+               value in any way
+             }
+             if paraarray[1].nodetype=unaryminusn then
+               begin
+                 paraarray[1]:=tunarynode(paraarray[1]).left;
+                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
+                   only no code is generated for it }
+                 negproduct:=not(negproduct);
+               end;
+
+             if paraarray[2].nodetype=unaryminusn then
+               begin
+                 paraarray[2]:=tunarynode(paraarray[2]).left;
+                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
+                   only no code is generated for it }
+                 negproduct:=not(negproduct);
+               end;
+
+             if paraarray[3].nodetype=unaryminusn then
+               begin
+                 paraarray[3]:=tunarynode(paraarray[3]).left;
+                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
+                   only no code is generated for it }
+                 negop3:=true;
+               end;
+
+              for i:=1 to 3 do
+               secondpass(paraarray[i]);
+
+             { only one memory operand is allowed }
+             gotmem:=false;
+             memop:=0;
+             for i:=1 to 3 do
+               begin
+                 if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+                   begin
+                     if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
+                       begin
+                         memop:=i;
+                         gotmem:=true;
+                       end
+                     else
+                       hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
+                   end;
+               end;
+
+             location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+             location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+             if gotmem then
+               begin
+                 case memop of
+                   1:
+                     begin
+                       hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
+                         paraarray[3].location.register,location.register,mms_movescalar);
+                       emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
+                         paraarray[1].location.reference,paraarray[2].location.register,location.register);
+                     end;
+                   2:
+                     begin
+                       hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
+                         paraarray[3].location.register,location.register,mms_movescalar);
+                       emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
+                         paraarray[2].location.reference,paraarray[1].location.register,location.register);
+                     end;
+                   3:
+                     begin
+                       hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
+                         paraarray[1].location.register,location.register,mms_movescalar);
+                       emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
+                         paraarray[3].location.reference,paraarray[2].location.register,location.register);
+                     end
+                   else
+                     internalerror(2014041301);
+                 end;
+               end
+             else
+               begin
+                 { try to use the location which is already in a temp. mm register as destination,
+                   so the compiler might be able to re-use the register }
+                 if paraarray[1].location.loc=LOC_MMREGISTER then
+                   begin
+                     hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
+                       paraarray[1].location.register,location.register,mms_movescalar);
+                     emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
+                       paraarray[3].location.register,paraarray[2].location.register,location.register);
+                   end
+                 else if paraarray[2].location.loc=LOC_MMREGISTER then
+                   begin
+                     hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
+                       paraarray[2].location.register,location.register,mms_movescalar);
+                     emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
+                       paraarray[3].location.register,paraarray[1].location.register,location.register);
+                   end
+                 else
+                   begin
+                     hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
+                       paraarray[3].location.register,location.register,mms_movescalar);
+                     emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
+                       paraarray[1].location.register,paraarray[2].location.register,location.register);
+                   end;
+               end;
+           end
+         else
+{$endif i8086}
+           internalerror(2014032301);
+      end;
+
 end.
 end.

+ 263 - 26
compiler/x86/nx86mat.pas

@@ -44,11 +44,15 @@ interface
 {$endif SUPPORT_MMX}
 {$endif SUPPORT_MMX}
       end;
       end;
 
 
+      tx86moddivnode = class(tcgmoddivnode)
+         procedure pass_generate_code;override;
+      end;
+
   implementation
   implementation
 
 
     uses
     uses
       globtype,
       globtype,
-      systems,
+      systems,constexp,
       cutils,verbose,globals,
       cutils,verbose,globals,
       symconst,symdef,
       symconst,symdef,
       aasmbase,aasmtai,aasmdata,defutil,
       aasmbase,aasmtai,aasmdata,defutil,
@@ -176,13 +180,13 @@ interface
             end;
             end;
 
 
             reference_reset_symbol(href,l1,0,resultdef.alignment);
             reference_reset_symbol(href,l1,0,resultdef.alignment);
-            reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
-            cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
 
 
             if UseAVX then
             if UseAVX then
-              cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,left.location.register,location.register,nil)
+              cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
             else
             else
               begin
               begin
+                reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+                cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
                 cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
                 cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
                 cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
                 cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
               end;
               end;
@@ -221,28 +225,12 @@ interface
 
 
     procedure tx86notnode.second_boolean;
     procedure tx86notnode.second_boolean;
       var
       var
-         hl : tasmlabel;
          opsize : tcgsize;
          opsize : tcgsize;
+         hreg: tregister;
       begin
       begin
         opsize:=def_cgsize(resultdef);
         opsize:=def_cgsize(resultdef);
 
 
-        if left.expectloc=LOC_JUMP then
-         begin
-           location_reset(location,LOC_JUMP,OS_NO);
-           hl:=current_procinfo.CurrTrueLabel;
-           current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;
-           current_procinfo.CurrFalseLabel:=hl;
-           secondpass(left);
-
-            if left.location.loc<>LOC_JUMP then
-              internalerror(2012081307);
-
-           maketojumpbool(current_asmdata.CurrAsmList,left,lr_load_regvars);
-           hl:=current_procinfo.CurrTrueLabel;
-           current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;
-           current_procinfo.CurrFalseLabel:=hl;
-         end
-        else
+        if not handle_locjump then
          begin
          begin
            { the second pass could change the location of left }
            { the second pass could change the location of left }
            { if it is a register variable, so we've to do      }
            { if it is a register variable, so we've to do      }
@@ -255,18 +243,80 @@ interface
                  location.resflags:=left.location.resflags;
                  location.resflags:=left.location.resflags;
                  inverse_flags(location.resflags);
                  inverse_flags(location.resflags);
                end;
                end;
+             LOC_CREFERENCE,
+             LOC_REFERENCE:
+               begin
+{$if defined(cpu32bitalu)}
+                 if is_64bit(resultdef) then
+                   begin
+                     hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
+                     tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
+                     cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
+                     inc(left.location.reference.offset,4);
+                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
+                   end
+                 else
+{$elseif defined(cpu16bitalu)}
+                 if is_64bit(resultdef) then
+                   begin
+                     hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
+                     tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
+                     cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
+                     inc(left.location.reference.offset,2);
+                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
+                     inc(left.location.reference.offset,2);
+                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
+                     inc(left.location.reference.offset,2);
+                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
+                   end
+                 else if is_32bit(resultdef) then
+                   begin
+                     hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
+                     tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
+                     cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
+                     inc(left.location.reference.offset,2);
+                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
+                   end
+                 else
+{$endif}
+                   emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
+                 location_reset(location,LOC_FLAGS,OS_NO);
+                 location.resflags:=F_E;
+               end;
              LOC_CONSTANT,
              LOC_CONSTANT,
              LOC_REGISTER,
              LOC_REGISTER,
              LOC_CREGISTER,
              LOC_CREGISTER,
-             LOC_REFERENCE,
-             LOC_CREFERENCE,
              LOC_SUBSETREG,
              LOC_SUBSETREG,
              LOC_CSUBSETREG,
              LOC_CSUBSETREG,
              LOC_SUBSETREF,
              LOC_SUBSETREF,
              LOC_CSUBSETREF :
              LOC_CSUBSETREF :
                begin
                begin
-                 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
-                 emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
+{$if defined(cpu32bitalu)}
+                 if is_64bit(resultdef) then
+                   begin
+                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
+                     emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
+                   end
+                 else
+{$elseif defined(cpu16bitalu)}
+                 if is_64bit(resultdef) then
+                   begin
+                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
+                     emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
+                     emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
+                     emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
+                   end
+                 else if is_32bit(resultdef) then
+                   begin
+                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
+                     emit_reg_reg(A_OR,S_L,GetNextReg(left.location.register),left.location.register);
+                   end
+                 else
+{$endif}
+                   begin
+                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
+                     emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
+                   end;
                  location_reset(location,LOC_FLAGS,OS_NO);
                  location_reset(location,LOC_FLAGS,OS_NO);
                  location.resflags:=F_E;
                  location.resflags:=F_E;
                end;
                end;
@@ -314,4 +364,191 @@ interface
       emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
       emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
     end;
     end;
 {$endif SUPPORT_MMX}
 {$endif SUPPORT_MMX}
+
+
+{*****************************************************************************
+                             TX86MODDIVNODE
+*****************************************************************************}
+
+    procedure tx86moddivnode.pass_generate_code;
+      var
+        hreg1,hreg2,rega,regd:Tregister;
+        power:longint;
+        op:Tasmop;
+        cgsize:TCgSize;
+        opsize:topsize;
+        e, sm: aint;
+        d,m: aword;
+        m_add: boolean;
+        s: byte;
+      begin
+        secondpass(left);
+        if codegenerror then
+          exit;
+        secondpass(right);
+        if codegenerror then
+          exit;
+
+        { put numerator in register }
+        cgsize:=def_cgsize(resultdef);
+        opsize:=TCGSize2OpSize[cgsize];
+        if not (cgsize in [OS_32,OS_S32,OS_64,OS_S64]) then
+          InternalError(2013102702);
+        rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));
+        regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));
+
+        location_reset(location,LOC_REGISTER,cgsize);
+        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
+        hreg1:=left.location.register;
+
+        if (nodetype=divn) and (right.nodetype=ordconstn) then
+          begin
+            if ispowerof2(int64(tordconstnode(right).value),power) then
+              begin
+                { for signed numbers, the numerator must be adjusted before the
+                  shift instruction, but not wih unsigned numbers! Otherwise,
+                  "Cardinal($ffffffff) div 16" overflows! (JM) }
+                if is_signed(left.resultdef) Then
+                  begin
+                    { use a sequence without jumps, saw this in
+                      comp.compilers (JM) }
+                    { no jumps, but more operations }
+                    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                    emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
+                    {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
+                    emit_const_reg(A_SAR,opsize,resultdef.size*8-1,hreg2);
+                    {If signed, hreg2=right value-1, otherwise 0.}
+                    { (don't use emit_const_reg, because if value>high(longint)
+                       then it must first be loaded into a register) }
+                    cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,tordconstnode(right).value-1,hreg2);
+                    { add to the left value }
+                    emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
+                    { do the shift }
+                    emit_const_reg(A_SAR,opsize,power,hreg1);
+                  end
+                else
+                  emit_const_reg(A_SHR,opsize,power,hreg1);
+                location.register:=hreg1;
+              end
+            else
+              begin
+                if is_signed(left.resultdef) then
+                  begin
+                    e:=tordconstnode(right).value.svalue;
+                    calc_divconst_magic_signed(resultdef.size*8,e,sm,s);
+                    cg.getcpuregister(current_asmdata.CurrAsmList,rega);
+                    emit_const_reg(A_MOV,opsize,sm,rega);
+                    cg.getcpuregister(current_asmdata.CurrAsmList,regd);
+                    emit_reg(A_IMUL,opsize,hreg1);
+                    { only the high half of result is used }
+                    cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
+                    { add or subtract dividend }
+                    if (e>0) and (sm<0) then
+                      emit_reg_reg(A_ADD,opsize,hreg1,regd)
+                    else if (e<0) and (sm>0) then
+                      emit_reg_reg(A_SUB,opsize,hreg1,regd);
+                    { shift if necessary }
+                    if (s<>0) then
+                      emit_const_reg(A_SAR,opsize,s,regd);
+                    { extract and add the sign bit }
+                    if (e<0) then
+                      emit_reg_reg(A_MOV,opsize,regd,hreg1);
+                    { if e>=0, hreg1 still contains dividend }
+                    emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);
+                    emit_reg_reg(A_ADD,opsize,hreg1,regd);
+                    cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
+                    location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                    cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
+                  end
+                else
+                  begin
+                    d:=tordconstnode(right).value.svalue;
+                    if d>=aword(1) shl (left.resultdef.size*8-1) then
+                      begin
+                        if (cgsize in [OS_64,OS_S64]) then
+                          begin
+                            hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                            emit_const_reg(A_MOV,opsize,aint(d),hreg2);
+                            emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
+                          end
+                        else
+                          emit_const_reg(A_CMP,opsize,aint(d),hreg1);
+                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                        emit_const_reg(A_MOV,opsize,0,location.register);
+                        emit_const_reg(A_SBB,opsize,-1,location.register);
+                      end
+                    else
+                      begin
+                        calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
+                        cg.getcpuregister(current_asmdata.CurrAsmList,rega);
+                        emit_const_reg(A_MOV,opsize,aint(m),rega);
+                        cg.getcpuregister(current_asmdata.CurrAsmList,regd);
+                        emit_reg(A_MUL,opsize,hreg1);
+                        cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
+                        if m_add then
+                          begin
+                            { addition can overflow, shift first bit considering carry,
+                              then shift remaining bits in regular way. }
+                            emit_reg_reg(A_ADD,opsize,hreg1,regd);
+                            emit_const_reg(A_RCR,opsize,1,regd);
+                            dec(s);
+                          end;
+                        if s<>0 then
+                          emit_const_reg(A_SHR,opsize,aint(s),regd);
+                        cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
+                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                        cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
+                      end;
+                  end;
+              end;
+          end
+        else
+          begin
+            {Bring denominator to a register.}
+            cg.getcpuregister(current_asmdata.CurrAsmList,rega);
+            emit_reg_reg(A_MOV,opsize,hreg1,rega);
+            cg.getcpuregister(current_asmdata.CurrAsmList,regd);
+            {Sign extension depends on the left type.}
+            if is_signed(left.resultdef) then
+              case left.resultdef.size of
+{$ifdef x86_64}
+                8:
+                  emit_none(A_CQO,S_NO);
+{$endif x86_64}
+                4:
+                  emit_none(A_CDQ,S_NO);
+                else
+                  internalerror(2013102701);
+              end
+            else
+              emit_reg_reg(A_XOR,opsize,regd,regd);
+
+            {Division depends on the right type.}
+            if is_signed(right.resultdef) then
+              op:=A_IDIV
+            else
+              op:=A_DIV;
+
+            if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
+              emit_ref(op,opsize,right.location.reference)
+            else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
+              emit_reg(op,opsize,right.location.register)
+            else
+              begin
+                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
+                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
+                emit_reg(op,opsize,hreg1);
+              end;
+
+            { Copy the result into a new register. Release R/EAX & R/EDX.}
+            cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
+            cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+            if nodetype=divn then
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
+            else
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
+          end;
+      end;
+
 end.
 end.

+ 9 - 7
compiler/x86/nx86mem.pas

@@ -35,9 +35,7 @@ interface
       end;
       end;
 
 
       tx86vecnode = class(tcgvecnode)
       tx86vecnode = class(tcgvecnode)
-{$ifndef i8086}
         procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
         procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
-{$endif not i8086}
       end;
       end;
 
 
 implementation
 implementation
@@ -46,7 +44,7 @@ implementation
       cutils,verbose,
       cutils,verbose,
       aasmtai,aasmdata,
       aasmtai,aasmdata,
       cgutils,cgobj,
       cgutils,cgobj,
-      symconst,symdef;
+      symconst,symdef,symcpu;
 
 
 {*****************************************************************************
 {*****************************************************************************
                            TX86DEREFNODE
                            TX86DEREFNODE
@@ -55,7 +53,7 @@ implementation
      procedure tx86derefnode.pass_generate_code;
      procedure tx86derefnode.pass_generate_code;
        begin
        begin
          inherited pass_generate_code;
          inherited pass_generate_code;
-         case tpointerdef(left.resultdef).x86pointertyp of
+         case tcpupointerdef(left.resultdef).x86pointertyp of
            x86pt_near: ;
            x86pt_near: ;
            x86pt_near_cs: location.reference.segment:=NR_CS;
            x86pt_near_cs: location.reference.segment:=NR_CS;
            x86pt_near_ds: location.reference.segment:=NR_DS;
            x86pt_near_ds: location.reference.segment:=NR_DS;
@@ -80,7 +78,6 @@ implementation
                              TX86VECNODE
                              TX86VECNODE
 *****************************************************************************}
 *****************************************************************************}
 
 
-{$ifndef i8086}
      { this routine must, like any other routine, not change the contents }
      { this routine must, like any other routine, not change the contents }
      { of base/index registers of references, as these may be regvars.    }
      { of base/index registers of references, as these may be regvars.    }
      { The register allocator can coalesce one LOC_REGISTER being moved   }
      { The register allocator can coalesce one LOC_REGISTER being moved   }
@@ -93,6 +90,7 @@ implementation
        var
        var
          l2 : integer;
          l2 : integer;
          hreg : tregister;
          hreg : tregister;
+         saveseg: TRegister;
        begin
        begin
          { Optimized for x86 to use the index register and scalefactor }
          { Optimized for x86 to use the index register and scalefactor }
          if location.reference.index=NR_NO then
          if location.reference.index=NR_NO then
@@ -102,7 +100,9 @@ implementation
          else if location.reference.base=NR_NO then
          else if location.reference.base=NR_NO then
           begin
           begin
             if (location.reference.scalefactor > 1) then
             if (location.reference.scalefactor > 1) then
-              hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+              hreg:=cg.getaddressregister(current_asmdata.CurrAsmList)
+            else
+              hreg:=NR_NO;
             case location.reference.scalefactor of
             case location.reference.scalefactor of
              0,1 : hreg:=location.reference.index;
              0,1 : hreg:=location.reference.index;
              2 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,1,location.reference.index,hreg);
              2 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,1,location.reference.index,hreg);
@@ -117,7 +117,10 @@ implementation
           begin
           begin
             hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
             hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
             cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,hreg);
             cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,hreg);
+            { reference_reset_base kills the segment, so make sure we preserve it }
+            saveseg:=location.reference.segment;
             reference_reset_base(location.reference,hreg,0,location.reference.alignment);
             reference_reset_base(location.reference,hreg,0,location.reference.alignment);
+            location.reference.segment:=saveseg;
           end;
           end;
          { insert the new index register and scalefactor or
          { insert the new index register and scalefactor or
            do the multiplication manual }
            do the multiplication manual }
@@ -138,7 +141,6 @@ implementation
          end;
          end;
          location.reference.index:=hreg;
          location.reference.index:=hreg;
        end;
        end;
-{$endif not i8086}
 
 
 begin
 begin
    cderefnode:=tx86derefnode;
    cderefnode:=tx86derefnode;

+ 25 - 21
compiler/x86/nx86set.pas

@@ -72,8 +72,9 @@ implementation
         indexreg : tregister;
         indexreg : tregister;
         href : treference;
         href : treference;
         jtlist: tasmlist;
         jtlist: tasmlist;
-        sectype: TAsmSectiontype;
         opcgsize: tcgsize;
         opcgsize: tcgsize;
+        jumpreg: tregister;
+        labeltyp: taiconst_type;
 
 
         procedure genitem(list:TAsmList;t : pcaselabel);
         procedure genitem(list:TAsmList;t : pcaselabel);
           var
           var
@@ -85,21 +86,13 @@ implementation
             i:=last.svalue+1;
             i:=last.svalue+1;
             while i<=t^._low.svalue-1 do
             while i<=t^._low.svalue-1 do
               begin
               begin
-{$ifdef i8086}
-                list.concat(Tai_const.Create_sym_near(elselabel));
-{$else i8086}
-                list.concat(Tai_const.Create_sym(elselabel));
-{$endif i8086}
+                list.concat(Tai_const.Create_type_sym(labeltyp,elselabel));
                 inc(i);
                 inc(i);
               end;
               end;
             i:=t^._low.svalue;
             i:=t^._low.svalue;
             while i<=t^._high.svalue do
             while i<=t^._high.svalue do
               begin
               begin
-{$ifdef i8086}
-                list.concat(Tai_const.Create_sym_near(blocklabel(t^.blockid)));
-{$else i8086}
-                list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
-{$endif i8086}
+                list.concat(Tai_const.Create_type_sym(labeltyp,blocklabel(t^.blockid)));
                 inc(i);
                 inc(i);
               end;
               end;
             last:=t^._high;
             last:=t^._high;
@@ -109,6 +102,8 @@ implementation
 
 
       begin
       begin
         last:=min_;
         last:=min_;
+        { This generates near pointers on i8086 }
+        labeltyp:=aitconst_ptr;
         opcgsize:=def_cgsize(opsize);
         opcgsize:=def_cgsize(opsize);
         if not(jumptable_no_range) then
         if not(jumptable_no_range) then
           begin
           begin
@@ -131,19 +126,24 @@ implementation
 {$else i8086}
 {$else i8086}
         href.scalefactor:=sizeof(aint);
         href.scalefactor:=sizeof(aint);
 {$endif i8086}
 {$endif i8086}
-        emit_ref(A_JMP,S_NO,href);
-        { generate jump table }
-        if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) then
+
+        if (not (target_info.system in [system_i386_darwin,system_i386_iphonesim])) and
+           (cs_create_pic in current_settings.moduleswitches) then
           begin
           begin
-            jtlist:=current_asmdata.asmlists[al_const];
-            sectype:=sec_rodata;
+            labeltyp:=aitconst_gotoff_symbol;
+            jumpreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
+            cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,jumpreg);
+            cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,current_procinfo.got,jumpreg);
+            emit_reg(A_JMP,S_NO,jumpreg);
           end
           end
         else
         else
-          begin
-            jtlist:=current_procinfo.aktlocaldata;
-            sectype:=sec_data;
-          end;
-        new_section(jtlist,sectype,current_procinfo.procdef.mangledname,sizeof(aint));
+          emit_ref(A_JMP,S_NO,href);
+        { generate jump table }
+        if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) then
+          jtlist:=current_asmdata.asmlists[al_const]
+        else
+          jtlist:=current_procinfo.aktlocaldata;
+        new_section(jtlist,sec_rodata,current_procinfo.procdef.mangledname,sizeof(aint));
         jtlist.concat(Tai_label.Create(table));
         jtlist.concat(Tai_label.Create(table));
         genitem(jtlist,hp);
         genitem(jtlist,hp);
       end;
       end;
@@ -375,6 +375,10 @@ implementation
 {$endif i8086}
 {$endif i8086}
 
 
        begin
        begin
+         ranges:=false;
+         numparts:=0;
+         fillchar(setparts,sizeof(setparts),0);
+
          { We check first if we can generate jumps, this can be done
          { We check first if we can generate jumps, this can be done
            because the resultdef is already set in firstpass }
            because the resultdef is already set in firstpass }
 
 

+ 1 - 0
compiler/x86/rax86.pas

@@ -997,6 +997,7 @@ var
   i,asize : longint;
   i,asize : longint;
   ai   : taicpu;
   ai   : taicpu;
 begin
 begin
+  ConcatInstruction:=nil;
   if (OpOrder=op_intel) then
   if (OpOrder=op_intel) then
     SwapOperands;
     SwapOperands;
 
 

+ 41 - 10
compiler/x86/rax86int.pas

@@ -349,8 +349,8 @@ Unit Rax86int;
                     c:=current_scanner.asmgetchar;
                     c:=current_scanner.asmgetchar;
                   end;
                   end;
                  uppervar(actasmpattern);
                  uppervar(actasmpattern);
-                 { after prefix we allow also a new opcode }
-                 If is_prefix(actopcode) and is_asmopcode(actasmpattern) then
+                 { after prefix (or segment override) we allow also a new opcode }
+                 If (is_prefix(actopcode) or is_override(actopcode)) and is_asmopcode(actasmpattern) then
                   Begin
                   Begin
                     { if we are not in a constant }
                     { if we are not in a constant }
                     { expression than this is an  }
                     { expression than this is an  }
@@ -1978,6 +1978,7 @@ Unit Rax86int;
       var
       var
         PrefixOp,OverrideOp: tasmop;
         PrefixOp,OverrideOp: tasmop;
         operandnum : longint;
         operandnum : longint;
+        t: TRegister;
         is_far_const:boolean;
         is_far_const:boolean;
         i:byte;
         i:byte;
       begin
       begin
@@ -2044,10 +2045,13 @@ Unit Rax86int;
           instr.opcode:=A_POPFW
           instr.opcode:=A_POPFW
         else if (instr.opcode=A_PUSHF) then
         else if (instr.opcode=A_PUSHF) then
           instr.opcode:=A_PUSHFW
           instr.opcode:=A_PUSHFW
+{$ifndef x86_64}
         else if (instr.opcode=A_PUSHA) then
         else if (instr.opcode=A_PUSHA) then
           instr.opcode:=A_PUSHAW
           instr.opcode:=A_PUSHAW
         else if (instr.opcode=A_POPA) then
         else if (instr.opcode=A_POPA) then
-          instr.opcode:=A_POPAW;
+          instr.opcode:=A_POPAW
+{$endif x86_64}
+        ;
         { We are reading operands, so opcode will be an AS_ID }
         { We are reading operands, so opcode will be an AS_ID }
         operandnum:=1;
         operandnum:=1;
         is_far_const:=false;
         is_far_const:=false;
@@ -2135,10 +2139,40 @@ Unit Rax86int;
             if (instr.opcode=A_CALL) and (typ=OPR_SYMBOL) and (symbol<>nil) and (symbol.typ<>AT_DATA) then
             if (instr.opcode=A_CALL) and (typ=OPR_SYMBOL) and (symbol<>nil) and (symbol.typ<>AT_DATA) then
               if current_settings.x86memorymodel in x86_far_code_models then
               if current_settings.x86memorymodel in x86_far_code_models then
                 begin
                 begin
-                  instr.operands[i].InitRef;
-                  ref.refaddr:=addr_far;
+                  instr.opsize:=S_FAR;
                 end;
                 end;
 {$endif i8086}
 {$endif i8086}
+        if (MemRefInfo(instr.opcode).ExistsSSEAVX) and
+           (MemRefInfo(instr.opcode).MemRefSize in MemRefSizeInfoVMems) then
+        begin
+          for i:=1 to operandnum do
+          begin
+            if (instr.operands[i].opr.typ = OPR_REFERENCE) and
+               (getregtype(instr.operands[i].opr.ref.base) = R_MMREGISTER) and
+               (instr.operands[i].opr.ref.index = NR_NO) then
+            begin
+              instr.operands[i].opr.ref.index := instr.operands[i].opr.ref.base;
+              instr.operands[i].opr.ref.base  := NR_NO;
+            end
+            else if (instr.operands[i].opr.typ = OPR_REFERENCE) and
+                    (getregtype(instr.operands[i].opr.ref.base) = R_MMREGISTER) and
+                    (getregtype(instr.operands[i].opr.ref.index) = R_INTREGISTER) and
+                    (getsubreg(instr.operands[i].opr.ref.index) = R_SUBADDR) then
+            begin
+              // exchange base- and index-register
+              // e.g. VGATHERDPD  XMM0, [XMM1 + RAX], XMM2 =>> VGATHERDPD  XMM0, [RAX + XMM1], XMM2
+              // e.g. VGATHERDPD  XMM0, [XMM1 + RAX * 2], XMM2 =>> not supported
+              // e.g. VGATHERDPD  XMM0, [XMM1 + RAX + 16], XMM2 =>> VGATHERDPD  XMM0, [RAX + XMM1 + 16]
+              if instr.operands[i].opr.ref.scalefactor > 1 then Message(asmr_e_invalid_reference_syntax)
+              else
+              begin
+                t := instr.operands[i].opr.ref.base;
+                instr.operands[i].opr.ref.base := instr.operands[i].opr.ref.index;
+                instr.operands[i].opr.ref.index := t;
+              end;
+            end;
+          end;
+        end;
       end;
       end;
 
 
 
 
@@ -2228,8 +2262,6 @@ Unit Rax86int;
        end;
        end;
       }
       }
       curlist:=TAsmList.Create;
       curlist:=TAsmList.Create;
-      { setup label linked list }
-      LocalLabelList:=TLocalLabelList.Create;
       { we might need to know which parameters are passed in registers }
       { we might need to know which parameters are passed in registers }
       if not parse_generic then
       if not parse_generic then
         current_procinfo.generate_parameter_info;
         current_procinfo.generate_parameter_info;
@@ -2330,9 +2362,8 @@ Unit Rax86int;
             end;
             end;
         end; { end case }
         end; { end case }
       until false;
       until false;
-      { Check LocalLabelList }
-      LocalLabelList.CheckEmitted;
-      LocalLabelList.Free;
+      { check that all referenced local labels are defined }
+      checklocallabels;
       { Return the list in an asmnode }
       { Return the list in an asmnode }
       assemble:=curlist;
       assemble:=curlist;
       Message1(asmr_d_finish_reading,'intel');
       Message1(asmr_d_finish_reading,'intel');

+ 76 - 27
compiler/x86/rgx86.pas

@@ -136,11 +136,18 @@ implementation
                 begin
                 begin
                   { avx instruction?
                   { avx instruction?
                     currently this rule is sufficient but it might be extended }
                     currently this rule is sufficient but it might be extended }
-                  if (ops=3) and (opcode<>A_SHRD) and (opcode<>A_SHLD) then
+                  if (ops=3) and (opcode<>A_SHRD) and (opcode<>A_SHLD) and (opcode<>A_IMUL) then
                     begin
                     begin
-                      { avx instructions allow only the first operand (at&t counting) to be a register operand }
-                      { all operands must be registers ... }
-                      if (oper[0]^.typ=top_reg) and
+                      { BMI shifting/rotating instructions have special requirements regarding spilling, only
+                        the middle operand can be replaced }
+                      if ((opcode=A_RORX) or (opcode=A_SHRX) or (opcode=A_SARX) or (opcode=A_SHLX)) then
+                        begin
+                          if (oper[1]^.typ=top_reg) and (getregtype(oper[1]^.reg)=regtype) and (get_alias(getsupreg(oper[1]^.reg))=orgreg) then
+                            replaceoper:=1;
+                        end
+                      { avx instructions allow only the first operand (at&t counting) to be a register operand
+                        all operands must be registers ... }
+                      else if (oper[0]^.typ=top_reg) and
                          (oper[1]^.typ=top_reg) and
                          (oper[1]^.typ=top_reg) and
                          (oper[2]^.typ=top_reg) and
                          (oper[2]^.typ=top_reg) and
                          { but they must be different }
                          { but they must be different }
@@ -155,7 +162,7 @@ implementation
                     end
                     end
                   else
                   else
                     begin
                     begin
-                      { We can handle opcodes with 2 and shrd/shld the same way, where the 3rd operand is const or CL,
+                      { We can handle opcodes with 2 and 3-op imul/shrd/shld the same way, where the 3rd operand is const or CL,
                         that doesn't need spilling.
                         that doesn't need spilling.
                         However, due to AT&T order inside the compiler, the 3rd operand is
                         However, due to AT&T order inside the compiler, the 3rd operand is
                         numbered 0, so look at operand no. 1 and 2 if we have 3 operands by
                         numbered 0, so look at operand no. 1 and 2 if we have 3 operands by
@@ -163,7 +170,24 @@ implementation
                       n:=0;
                       n:=0;
                       if ops=3 then
                       if ops=3 then
                         n:=1;
                         n:=1;
-                      if (oper[n+0]^.typ=top_reg) and
+                      { lea is tricky: part of operand 0 can be spilled and the instruction can converted into an
+                        add, if base or index shall be spilled and the other one is equal the destination }
+                      if (opcode=A_LEA) then
+                        begin
+                          if (oper[0]^.ref^.offset=0) and
+                             (oper[0]^.ref^.scalefactor in [0,1]) and
+                             (((getregtype(oper[0]^.ref^.base)=regtype) and
+                               (get_alias(getsupreg(oper[0]^.ref^.base))=orgreg) and
+                               (getregtype(oper[0]^.ref^.index)=getregtype(oper[1]^.reg)) and
+                               (get_alias(getsupreg(oper[0]^.ref^.index))=get_alias(getsupreg(oper[1]^.reg)))) or
+                              ((getregtype(oper[0]^.ref^.index)=regtype) and
+                               (get_alias(getsupreg(oper[0]^.ref^.index))=orgreg) and
+                               (getregtype(oper[0]^.ref^.base)=getregtype(oper[1]^.reg)) and
+                               (get_alias(getsupreg(oper[0]^.ref^.base))=get_alias(getsupreg(oper[1]^.reg))))
+                             ) then
+                             replaceoper:=0;
+                        end
+                      else if (oper[n+0]^.typ=top_reg) and
                          (oper[n+1]^.typ=top_reg) and
                          (oper[n+1]^.typ=top_reg) and
                          ((getregtype(oper[n+0]^.reg)<>regtype) or
                          ((getregtype(oper[n+0]^.reg)<>regtype) or
                           (getregtype(oper[n+1]^.reg)<>regtype) or
                           (getregtype(oper[n+1]^.reg)<>regtype) or
@@ -220,7 +244,9 @@ implementation
                               A_CMOVcc,
                               A_CMOVcc,
                               A_MOVZX,
                               A_MOVZX,
                               A_MOVSX,
                               A_MOVSX,
+{$ifdef x86_64}
                               A_MOVSXD,
                               A_MOVSXD,
+{$endif x86_64}
                               A_MULSS,
                               A_MULSS,
                               A_MULSD,
                               A_MULSD,
                               A_SUBSS,
                               A_SUBSS,
@@ -229,6 +255,8 @@ implementation
                               A_ADDSS,
                               A_ADDSS,
                               A_DIVSD,
                               A_DIVSD,
                               A_DIVSS,
                               A_DIVSS,
+                              A_SQRTSD,
+                              A_SQRTSS,
                               A_SHLD,
                               A_SHLD,
                               A_SHRD,
                               A_SHRD,
                               A_COMISD,
                               A_COMISD,
@@ -255,7 +283,6 @@ implementation
                               A_CVTSS2SI,
                               A_CVTSS2SI,
                               A_CVTTPS2PI,
                               A_CVTTPS2PI,
                               A_CVTTSS2SI,
                               A_CVTTSS2SI,
-                              A_IMUL,
                               A_XORPD,
                               A_XORPD,
                               A_XORPS,
                               A_XORPS,
                               A_ORPD,
                               A_ORPD,
@@ -265,9 +292,14 @@ implementation
                               A_UNPCKLPS,
                               A_UNPCKLPS,
                               A_UNPCKHPS,
                               A_UNPCKHPS,
                               A_SHUFPD,
                               A_SHUFPD,
-                              A_SHUFPS:
-
+                              A_SHUFPS,
+                              A_VCOMISD,
+                              A_VCOMISS:
                                 replaceoper:=-1;
                                 replaceoper:=-1;
+
+                              A_IMUL:
+                                if ops<>3 then
+                                  replaceoper:=-1;
 {$ifdef x86_64}
 {$ifdef x86_64}
                               A_MOV:
                               A_MOV:
                                  { 64 bit constants can only be moved into registers }
                                  { 64 bit constants can only be moved into registers }
@@ -279,7 +311,16 @@ implementation
 {$endif x86_64}
 {$endif x86_64}
                             end;
                             end;
                           end;
                           end;
-                        end;
+                        2 :
+                          begin
+                            { Some 3-op instructions don't allow memory references
+                              for destination }
+                            case instr.opcode of
+                              A_IMUL:
+                                replaceoper:=-1;
+                            end;
+                          end;
+                      end;
                     end;
                     end;
                 end;
                 end;
              end;
              end;
@@ -296,23 +337,31 @@ implementation
             { Replace register with spill reference }
             { Replace register with spill reference }
             if replaceoper<>-1 then
             if replaceoper<>-1 then
               begin
               begin
-                is_subh:=getsubreg(oper[replaceoper]^.reg)=R_SUBH;
-                oper[replaceoper]^.typ:=top_ref;
-                new(oper[replaceoper]^.ref);
-                oper[replaceoper]^.ref^:=spilltemp;
-                if is_subh then
-                  inc(oper[replaceoper]^.ref^.offset);
-                { memory locations aren't guaranteed to be aligned }
-                case opcode of
-                  A_MOVAPS:
-                    opcode:=A_MOVSS;
-                  A_MOVAPD:
-                    opcode:=A_MOVSD;
-                  A_VMOVAPS:
-                    opcode:=A_VMOVSS;
-                  A_VMOVAPD:
-                    opcode:=A_VMOVSD;
-                end;
+                if opcode=A_LEA then
+                  begin
+                    opcode:=A_ADD;
+                    oper[0]^.ref^:=spilltemp;
+                  end
+                else
+                  begin
+                    is_subh:=getsubreg(oper[replaceoper]^.reg)=R_SUBH;
+                    oper[replaceoper]^.typ:=top_ref;
+                    new(oper[replaceoper]^.ref);
+                    oper[replaceoper]^.ref^:=spilltemp;
+                    if is_subh then
+                      inc(oper[replaceoper]^.ref^.offset);
+                    { memory locations aren't guaranteed to be aligned }
+                    case opcode of
+                      A_MOVAPS:
+                        opcode:=A_MOVSS;
+                      A_MOVAPD:
+                        opcode:=A_MOVSD;
+                      A_VMOVAPS:
+                        opcode:=A_VMOVSS;
+                      A_VMOVAPD:
+                        opcode:=A_VMOVSD;
+                    end;
+                  end;
                 result:=true;
                 result:=true;
               end;
               end;
           end;
           end;

+ 94 - 0
compiler/x86/symi86.pas

@@ -0,0 +1,94 @@
+{
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Symbol table overrides for i386 and i8086
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit symi86;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  symtype,symdef,symsym;
+
+type
+
+  { ti86procvardef }
+
+  ti86procvardef = class(tprocvardef)
+    function is_pushleftright: boolean; override;
+  end;
+
+  { ti86procdef }
+
+  ti86procdef = class(tprocdef)
+    function is_pushleftright: boolean; override;
+  end;
+
+  { ti86absolutevarsym }
+
+  ti86absolutevarsym = class(tabsolutevarsym)
+   protected
+    procedure ppuload_platform(ppufile: tcompilerppufile); override;
+    procedure ppuwrite_platform(ppufile: tcompilerppufile); override;
+   public
+    absseg  : boolean;
+  end;
+
+implementation
+
+uses
+  symconst;
+
+{ ti86procvardef }
+
+function ti86procvardef.is_pushleftright: boolean;
+  begin
+    result:=proccalloption in pushleftright_pocalls;
+  end;
+
+{ ti86procdef }
+
+function ti86procdef.is_pushleftright: boolean;
+  begin
+    result:=proccalloption in pushleftright_pocalls;
+  end;
+
+{ ti86absolutevarsym }
+
+procedure ti86absolutevarsym.ppuload_platform(ppufile: tcompilerppufile);
+  begin
+    inherited;
+    if abstyp=toaddr then
+      absseg:=boolean(ppufile.getbyte)
+    else
+      absseg:=false;
+  end;
+
+
+procedure ti86absolutevarsym.ppuwrite_platform(ppufile: tcompilerppufile);
+  begin
+    inherited;
+    if abstyp=toaddr then
+      ppufile.putbyte(byte(absseg));
+  end;
+
+end.
+

+ 140 - 0
compiler/x86/symx86.pas

@@ -0,0 +1,140 @@
+{
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Symbol table overrides for x86
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit symx86;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype,
+  symconst, symtype,symdef,symsym;
+
+type
+  tx86pointerdef = class(tpointerdef)
+   protected
+    procedure ppuload_platform(ppufile: tcompilerppufile); override;
+    procedure ppuwrite_platform(ppufile: tcompilerppufile); override;
+   public
+    x86pointertyp : tx86pointertyp;
+    constructor create(def: tdef); override;
+    constructor createx86(def:tdef;x86typ:tx86pointertyp);virtual;
+    function size: asizeint; override;
+    function getcopy: tstoreddef; override;
+    function GetTypeName: string; override;
+    class function default_x86_data_pointer_type: tx86pointertyp; virtual;
+  end;
+  tx86pointerdefclass = class of tx86pointerdef;
+
+implementation
+
+  uses
+    globals, verbose;
+
+{****************************************************************************
+                             tx86pointerdef
+****************************************************************************}
+
+  procedure tx86pointerdef.ppuload_platform(ppufile: tcompilerppufile);
+    begin
+      inherited;
+      x86pointertyp:=tx86pointertyp(ppufile.getbyte);
+    end;
+
+
+  procedure tx86pointerdef.ppuwrite_platform(ppufile: tcompilerppufile);
+    begin
+      inherited;
+      ppufile.putbyte(byte(x86pointertyp));
+    end;
+
+
+  constructor tx86pointerdef.create(def: tdef);
+    begin
+      inherited create(def);
+      x86pointertyp := default_x86_data_pointer_type;
+    end;
+
+
+  constructor tx86pointerdef.createx86(def: tdef; x86typ: tx86pointertyp);
+    begin
+      tabstractpointerdef(self).create(pointerdef,def);
+      x86pointertyp := x86typ;
+      has_pointer_math:=cs_pointermath in current_settings.localswitches;
+    end;
+
+
+  function tx86pointerdef.size: asizeint;
+    begin
+      if x86pointertyp in [x86pt_far,x86pt_huge] then
+        result:=sizeof(pint)+2
+      else
+        result:=inherited;
+    end;
+
+
+  function tx86pointerdef.getcopy: tstoreddef;
+    begin
+      result:=inherited;
+      tx86pointerdef(result).x86pointertyp:=x86pointertyp;
+    end;
+
+
+  function tx86pointerdef.GetTypeName: string;
+    begin
+      result:=inherited;
+      if x86pointertyp<>default_x86_data_pointer_type then
+        begin
+          case x86pointertyp of
+            x86pt_near:
+              result:=result+';near';
+            x86pt_near_cs:
+              result:=result+';near ''CS''';
+            x86pt_near_ds:
+              result:=result+';near ''DS''';
+            x86pt_near_ss:
+              result:=result+';near ''SS''';
+            x86pt_near_es:
+              result:=result+';near ''ES''';
+            x86pt_near_fs:
+              result:=result+';near ''FS''';
+            x86pt_near_gs:
+              result:=result+';near ''GS''';
+            x86pt_far:
+              result:=result+';far';
+            x86pt_huge:
+              result:=result+';huge';
+            else
+              internalerror(2013050301);
+          end;
+        end;
+    end;
+
+
+  class function tx86pointerdef.default_x86_data_pointer_type: tx86pointertyp;
+    begin
+      result:=x86pt_near;
+    end;
+
+
+end.
+

+ 485 - 82
compiler/x86/x86ins.dat

@@ -924,8 +924,7 @@ reg16|32,mem          \320\1\xC5\110                  8086,NOX86_64
 
 
 [LEA,leaX]
 [LEA,leaX]
 (Ch_Wop2, Ch_Rop1, Ch_None)
 (Ch_Wop2, Ch_Rop1, Ch_None)
-reg32|64,mem          \320\1\x8D\110                  8086
-reg32|64,imm          \320\1\x8D\110                  8086,SD
+reg16|32|64,mem       \301\320\1\x8D\110              8086
 
 
 [LEAVE]
 [LEAVE]
 (Ch_RWESP, Ch_WEBP, Ch_None)
 (Ch_RWESP, Ch_WEBP, Ch_None)
@@ -1239,12 +1238,12 @@ mmxreg,mmxrm          \2\x0F\xDD\110                  PENT,MMX,SM
 xmmreg,xmmrm          \361\2\x0F\xDD\110              WILLAMETTE,SSE2,SM
 xmmreg,xmmrm          \361\2\x0F\xDD\110              WILLAMETTE,SSE2,SM
 
 
 [PADDW]
 [PADDW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 mmxreg,mmxrm          \2\x0F\xFD\110                  PENT,MMX,SM
 mmxreg,mmxrm          \2\x0F\xFD\110                  PENT,MMX,SM
 xmmreg,xmmrm          \361\2\x0F\xFD\110              WILLAMETTE,SSE2,SM
 xmmreg,xmmrm          \361\2\x0F\xFD\110              WILLAMETTE,SSE2,SM
 
 
 [PAND]
 [PAND]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 mmxreg,mmxrm          \2\x0F\xDB\110                  PENT,MMX,SM
 mmxreg,mmxrm          \2\x0F\xDB\110                  PENT,MMX,SM
 xmmreg,xmmrm          \361\2\x0F\xDB\110              WILLAMETTE,SSE2,SM
 xmmreg,xmmrm          \361\2\x0F\xDB\110              WILLAMETTE,SSE2,SM
 
 
@@ -2093,7 +2092,7 @@ void                  \333\3\x0F\xA7\xE0              P6,CYRIX
 void                  \333\3\x0F\xA7\xE8              P6,CYRIX
 void                  \333\3\x0F\xA7\xE8              P6,CYRIX
 
 
 [CMOVcc,cmovCCX]
 [CMOVcc,cmovCCX]
-(Ch_ROp1, Ch_WOp2, Ch_RFLAGS)
+(Ch_ROp1, Ch_RWOp2, Ch_RFLAGS)
 reg16|32|64,regmem    \320\1\x0F\13\x40\110          P6,SM
 reg16|32|64,regmem    \320\1\x0F\13\x40\110          P6,SM
 
 
 [Jcc]
 [Jcc]
@@ -2558,12 +2557,12 @@ void                    \3\x0F\xAE\xF0                WILLAMETTE,SSE2
 ; Willamette MMX instructions (SSE2 SIMD Integer Instructions)
 ; Willamette MMX instructions (SSE2 SIMD Integer Instructions)
 ;
 ;
 [MOVDQA]
 [MOVDQA]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Wop2, Ch_None)
 xmmrm,xmmreg            \361\2\x0F\x7F\101              WILLAMETTE,SSE2,SM
 xmmrm,xmmreg            \361\2\x0F\x7F\101              WILLAMETTE,SSE2,SM
 xmmreg,xmmrm            \361\2\x0F\x6F\110              WILLAMETTE,SSE2,SM
 xmmreg,xmmrm            \361\2\x0F\x6F\110              WILLAMETTE,SSE2,SM
 
 
 [MOVDQU]
 [MOVDQU]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Wop2, Ch_None)
 xmmrm,xmmreg            \333\2\x0F\x7F\101              WILLAMETTE,SSE2,SM
 xmmrm,xmmreg            \333\2\x0F\x7F\101              WILLAMETTE,SSE2,SM
 xmmreg,xmmrm            \333\2\x0F\x6F\110              WILLAMETTE,SSE2,SM
 xmmreg,xmmrm            \333\2\x0F\x6F\110              WILLAMETTE,SSE2,SM
 
 
@@ -2910,7 +2909,7 @@ xmmreg,xmmrm            \334\2\x0F\x7D\110        PRESCOTT,SSE3,SM
 xmmreg,mem              \334\2\x0F\xF0\110        PRESCOTT,SSE3
 xmmreg,mem              \334\2\x0F\xF0\110        PRESCOTT,SSE3
 
 
 [MOVDDUP]
 [MOVDDUP]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Wop2, Ch_None)
 xmmreg,xmmrm            \334\2\x0F\x12\110        PRESCOTT,SSE3
 xmmreg,xmmrm            \334\2\x0F\x12\110        PRESCOTT,SSE3
 
 
 [MOVSHDUP]
 [MOVSHDUP]
@@ -3684,22 +3683,22 @@ reg64,mem32                          \333\362\363\370\1\x2C\110           AVX,SA
 reg64,xmmreg                         \333\362\363\370\1\x2C\110           AVX,SANDYBRIDGE
 reg64,xmmreg                         \333\362\363\370\1\x2C\110           AVX,SANDYBRIDGE
 
 
 [VDIVPD]
 [VDIVPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x5E\75\120        AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x5E\75\120        AVX,SANDYBRIDGE
 
 
 [VDIVPS]
 [VDIVPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \362\370\1\x5E\75\120                AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \362\370\1\x5E\75\120                AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x5E\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x5E\75\120            AVX,SANDYBRIDGE
 
 
 [VDIVSD]
 [VDIVSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,mem64                  \334\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,mem64                  \334\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 
 
 [VDIVSS]
 [VDIVSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,mem32                  \333\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,mem32                  \333\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x5E\75\120            AVX,SANDYBRIDGE
 
 
@@ -3817,39 +3816,39 @@ xmmreg,xmmreg,mem32                  \333\362\370\1\x5D\75\120            AVX,SA
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x5D\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x5D\75\120            AVX,SANDYBRIDGE
 
 
 [VMOVAPD]
 [VMOVAPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmrm                         \361\362\370\1\x28\110               AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \361\362\370\1\x28\110               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \361\362\370\1\x29\101               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \361\362\370\1\x29\101               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x28\110           AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x28\110           AVX,SANDYBRIDGE
 ymmrm,ymmreg                         \361\362\364\370\1\x29\101           AVX,SANDYBRIDGE
 ymmrm,ymmreg                         \361\362\364\370\1\x29\101           AVX,SANDYBRIDGE
 
 
 [VMOVAPS]
 [VMOVAPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmrm                         \362\370\1\x28\110                   AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \362\370\1\x28\110                   AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \362\370\1\x29\101                   AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \362\370\1\x29\101                   AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \362\364\370\1\x28\110               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \362\364\370\1\x28\110               AVX,SANDYBRIDGE
 ymmrm,ymmreg                         \362\364\370\1\x29\101               AVX,SANDYBRIDGE
 ymmrm,ymmreg                         \362\364\370\1\x29\101               AVX,SANDYBRIDGE
 
 
 [VMOVD]
 [VMOVD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,rm32                          \361\362\370\1\x6E\110               AVX,SANDYBRIDGE
 xmmreg,rm32                          \361\362\370\1\x6E\110               AVX,SANDYBRIDGE
 rm32,xmmreg                          \361\362\370\1\x7E\101               AVX,SANDYBRIDGE
 rm32,xmmreg                          \361\362\370\1\x7E\101               AVX,SANDYBRIDGE
 
 
 [VMOVDDUP]
 [VMOVDDUP]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 ymmreg,ymmrm                         \334\362\364\370\1\x12\110           AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \334\362\364\370\1\x12\110           AVX,SANDYBRIDGE
 xmmreg,mem64                         \334\362\370\1\x12\110               AVX,SANDYBRIDGE
 xmmreg,mem64                         \334\362\370\1\x12\110               AVX,SANDYBRIDGE
 xmmreg,xmmreg                        \334\362\370\1\x12\110               AVX,SANDYBRIDGE
 xmmreg,xmmreg                        \334\362\370\1\x12\110               AVX,SANDYBRIDGE
 
 
 [VMOVDQA]
 [VMOVDQA]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 ymmrm,ymmreg                         \361\362\364\370\1\x7F\101           AVX,SANDYBRIDGE
 ymmrm,ymmreg                         \361\362\364\370\1\x7F\101           AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \361\362\370\1\x6F\110               AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \361\362\370\1\x6F\110               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \361\362\370\1\x7F\101               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \361\362\370\1\x7F\101               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x6F\110           AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x6F\110           AVX,SANDYBRIDGE
 
 
 [VMOVDQU]
 [VMOVDQU]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 ymmreg,ymmrm                         \333\362\364\370\1\x6F\110           AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \333\362\364\370\1\x6F\110           AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \333\362\370\1\x6F\110               AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \333\362\370\1\x6F\110               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \333\362\370\1\x7F\101               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \333\362\370\1\x7F\101               AVX,SANDYBRIDGE
@@ -3898,32 +3897,33 @@ reg32,xmmreg                         \362\370\1\x50\110                   AVX,SA
 reg64,ymmreg                         \362\364\370\1\x50\110               AVX,SANDYBRIDGE
 reg64,ymmreg                         \362\364\370\1\x50\110               AVX,SANDYBRIDGE
 
 
 [VMOVNTDQ]
 [VMOVNTDQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 mem128,xmmreg                        \361\362\370\1\xE7\101               AVX,SANDYBRIDGE
 mem128,xmmreg                        \361\362\370\1\xE7\101               AVX,SANDYBRIDGE
 mem256,ymmreg                        \361\362\364\370\1\xE7\101           AVX,SANDYBRIDGE
 mem256,ymmreg                        \361\362\364\370\1\xE7\101           AVX,SANDYBRIDGE
 
 
 [VMOVNTDQA]
 [VMOVNTDQA]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,mem128                        \361\362\371\1\x2A\110               AVX,SANDYBRIDGE
 xmmreg,mem128                        \361\362\371\1\x2A\110               AVX,SANDYBRIDGE
 ymmreg,mem256                        \361\362\364\371\1\x2A\110           AVX2
 ymmreg,mem256                        \361\362\364\371\1\x2A\110           AVX2
 
 
 [VMOVNTPD]
 [VMOVNTPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 mem256,ymmreg                        \361\362\364\370\1\x2B\101           AVX,SANDYBRIDGE
 mem256,ymmreg                        \361\362\364\370\1\x2B\101           AVX,SANDYBRIDGE
 mem128,xmmreg                        \361\362\370\1\x2B\101               AVX,SANDYBRIDGE
 mem128,xmmreg                        \361\362\370\1\x2B\101               AVX,SANDYBRIDGE
 
 
 [VMOVNTPS]
 [VMOVNTPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 mem128,ymmreg                        \362\364\370\1\x2B\101               AVX,SANDYBRIDGE
 mem128,ymmreg                        \362\364\370\1\x2B\101               AVX,SANDYBRIDGE
 mem128,xmmreg                        \362\370\1\x2B\101                   AVX,SANDYBRIDGE
 mem128,xmmreg                        \362\370\1\x2B\101                   AVX,SANDYBRIDGE
 
 
 [VMOVQ]
 [VMOVQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 rm64,xmmreg                          \361\362\363\370\1\x7E\101           AVX,SANDYBRIDGE
 rm64,xmmreg                          \361\362\363\370\1\x7E\101           AVX,SANDYBRIDGE
 xmmreg,rm64                          \361\362\363\370\1\x6E\110           AVX,SANDYBRIDGE
 xmmreg,rm64                          \361\362\363\370\1\x6E\110           AVX,SANDYBRIDGE
 
 
 [VMOVSD]
 [VMOVSD]
-(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+; the three ops must be handle by the compiler internally
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x10\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x10\75\120            AVX,SANDYBRIDGE
 xmmreg,mem64                         \334\362\370\1\x10\110               AVX,SANDYBRIDGE
 xmmreg,mem64                         \334\362\370\1\x10\110               AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x11\75\102            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x11\75\102            AVX,SANDYBRIDGE
@@ -3940,21 +3940,22 @@ xmmreg,xmmrm                         \333\362\370\1\x12\110               AVX,SA
 ymmreg,ymmrm                         \333\362\364\370\1\x12\110           AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \333\362\364\370\1\x12\110           AVX,SANDYBRIDGE
 
 
 [VMOVSS]
 [VMOVSS]
-(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+; the three ops must be handle by the compiler internally
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x10\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x10\75\120            AVX,SANDYBRIDGE
 xmmreg,mem64                         \333\362\370\1\x10\110               AVX,SANDYBRIDGE
 xmmreg,mem64                         \333\362\370\1\x10\110               AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x11\75\102            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x11\75\102            AVX,SANDYBRIDGE
 mem64,xmmreg                         \333\362\370\1\x11\101               AVX,SANDYBRIDGE
 mem64,xmmreg                         \333\362\370\1\x11\101               AVX,SANDYBRIDGE
 
 
 [VMOVUPD]
 [VMOVUPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmrm                         \361\362\370\1\x10\110               AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \361\362\370\1\x10\110               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \361\362\370\1\x11\101               AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \361\362\370\1\x11\101               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x10\110           AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x10\110           AVX,SANDYBRIDGE
 ymmrm,ymmreg                         \361\362\364\370\1\x11\101           AVX,SANDYBRIDGE
 ymmrm,ymmreg                         \361\362\364\370\1\x11\101           AVX,SANDYBRIDGE
 
 
 [VMOVUPS]
 [VMOVUPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmrm                         \362\370\1\x10\110                   AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \362\370\1\x10\110                   AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \362\370\1\x11\101                   AVX,SANDYBRIDGE
 xmmrm,xmmreg                         \362\370\1\x11\101                   AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \362\364\370\1\x10\110               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \362\364\370\1\x10\110               AVX,SANDYBRIDGE
@@ -3986,11 +3987,11 @@ xmmreg,xmmreg,mem32                  \333\362\370\1\x59\75\120            AVX,SA
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x59\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x59\75\120            AVX,SANDYBRIDGE
 
 
 [VORPD]
 [VORPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x56\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x56\75\120            AVX,SANDYBRIDGE
 
 
 [VORPS]
 [VORPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \362\370\1\x56\75\120                AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \362\370\1\x56\75\120                AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x56\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x56\75\120            AVX,SANDYBRIDGE
 
 
@@ -4040,32 +4041,32 @@ xmmreg,xmmreg,xmmrm                  \361\362\370\1\xFE\75\120            AVX,SA
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFE\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFE\75\120        AVX2
 
 
 [VPADDQ]
 [VPADDQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD4\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD4\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD4\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD4\75\120        AVX2
 
 
 [VPADDSB]
 [VPADDSB]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xEC\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xEC\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xEC\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xEC\75\120        AVX2
 
 
 [VPADDSW]
 [VPADDSW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xED\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xED\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xED\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xED\75\120        AVX2
 
 
 [VPADDUSB]
 [VPADDUSB]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDC\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDC\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDC\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDC\75\120        AVX2
 
 
 [VPADDUSW]
 [VPADDUSW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDD\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDD\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDD\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDD\75\120        AVX2
 
 
 [VPADDW]
 [VPADDW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xFD\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xFD\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFD\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFD\75\120        AVX2
 
 
@@ -4075,12 +4076,12 @@ xmmreg,xmmreg,xmmrm,imm8             \361\362\372\1\x0F\75\120\27         AVX,SA
 ymmreg,ymmreg,ymmrm,imm8             \361\362\364\372\1\x0F\75\120\27     AVX2
 ymmreg,ymmreg,ymmrm,imm8             \361\362\364\372\1\x0F\75\120\27     AVX2
 
 
 [VPAND]
 [VPAND]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDB\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDB\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDB\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDB\75\120        AVX2
 
 
 [VPANDN]
 [VPANDN]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDF\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xDF\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDF\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xDF\75\120        AVX2
 
 
@@ -4414,42 +4415,42 @@ ymmreg,xmmreg                        \361\362\364\371\1\x34\110           AVX2
 
 
 
 
 [VPMULDQ]
 [VPMULDQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\371\1\x28\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\371\1\x28\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x28\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x28\75\120        AVX2
 
 
 [VPMULHRSW]
 [VPMULHRSW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\371\1\x0B\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\371\1\x0B\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x0B\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x0B\75\120        AVX2
 
 
 [VPMULHUW]
 [VPMULHUW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE4\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE4\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE4\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE4\75\120        AVX2
 
 
 [VPMULHW]
 [VPMULHW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE5\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE5\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE5\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE5\75\120        AVX2
 
 
 [VPMULLD]
 [VPMULLD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\371\1\x40\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\371\1\x40\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x40\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x40\75\120        AVX2
 
 
 [VPMULLW]
 [VPMULLW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD5\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD5\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD5\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD5\75\120        AVX2
 
 
 [VPMULUDQ]
 [VPMULUDQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF4\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF4\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xF4\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xF4\75\120        AVX2
 
 
 [VPOR]
 [VPOR]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xEB\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xEB\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xEB\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xEB\75\120        AVX2
 
 
@@ -4494,27 +4495,27 @@ xmmreg,xmmreg,xmmrm                  \361\362\371\1\x09\75\120            AVX,SA
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x09\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x09\75\120        AVX2
 
 
 [VPSLLD]
 [VPSLLD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x72\74\216\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x72\74\216\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF2\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF2\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x72\74\216\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x72\74\216\26     AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xF2\75\120        AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xF2\75\120        AVX2
 
 
 [VPSLLDQ]
 [VPSLLDQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\217\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\217\26         AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\217\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\217\26     AVX2
 
 
 
 
 [VPSLLQ]
 [VPSLLQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF3\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF3\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\216\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\216\26         AVX,SANDYBRIDGE
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xF3\75\120        AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xF3\75\120        AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\216\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\216\26     AVX2
 
 
 [VPSLLW]
 [VPSLLW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x71\74\216\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x71\74\216\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF1\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF1\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x71\74\216\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x71\74\216\26     AVX2
@@ -4522,21 +4523,21 @@ ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xF1\75\120        AVX2
 
 
 
 
 [VPSRAD]
 [VPSRAD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x72\74\214\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x72\74\214\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE2\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE2\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x72\74\214\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x72\74\214\26     AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xE2\75\120        AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xE2\75\120        AVX2
 
 
 [VPSRAW]
 [VPSRAW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x71\74\214\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x71\74\214\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE1\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE1\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x71\74\214\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x71\74\214\26     AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xE1\75\120        AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xE1\75\120        AVX2
 
 
 [VPSRLD]
 [VPSRLD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x72\74\212\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x72\74\212\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD2\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD2\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x72\74\212\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x72\74\212\26     AVX2
@@ -4544,61 +4545,61 @@ ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xD2\75\120        AVX2
 
 
 
 
 [VPSRLDQ]
 [VPSRLDQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\213\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\213\26         AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\213\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\213\26     AVX2
 
 
 [VPSRLQ]
 [VPSRLQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\212\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\212\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD3\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD3\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\212\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x73\74\212\26     AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xD3\75\120        AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xD3\75\120        AVX2
 
 
 [VPSRLW]
 [VPSRLW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x71\74\212\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,imm8                   \361\362\370\1\x71\74\212\26         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD1\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD1\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x71\74\212\26     AVX2
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x71\74\212\26     AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xD1\75\120        AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xD1\75\120        AVX2
 
 
 [VPSUBB]
 [VPSUBB]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF8\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF8\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xF8\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xF8\75\120        AVX2
 
 
 [VPSUBD]
 [VPSUBD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xFA\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xFA\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFA\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFA\75\120        AVX2
 
 
 [VPSUBQ]
 [VPSUBQ]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xFB\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xFB\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFB\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xFB\75\120        AVX2
 
 
 [VPSUBSB]
 [VPSUBSB]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE8\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE8\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE8\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE8\75\120        AVX2
 
 
 [VPSUBSW]
 [VPSUBSW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE9\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xE9\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE9\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xE9\75\120        AVX2
 
 
 [VPSUBUSB]
 [VPSUBUSB]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD8\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD8\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD8\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD8\75\120        AVX2
 
 
 [VPSUBUSW]
 [VPSUBUSW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD9\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD9\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD9\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xD9\75\120        AVX2
 
 
 [VPSUBW]
 [VPSUBW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF9\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xF9\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xF9\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xF9\75\120        AVX2
 
 
@@ -4648,7 +4649,7 @@ xmmreg,xmmreg,xmmrm                  \361\362\370\1\x61\75\120            AVX,SA
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x61\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x61\75\120        AVX2
 
 
 [VPXOR]
 [VPXOR]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xEF\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xEF\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xEF\75\120        AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\xEF\75\120        AVX2
 
 
@@ -4683,12 +4684,12 @@ xmmreg,xmmreg,mem32,imm8             \361\362\372\1\x0A\75\120\27         AVX,SA
 xmmreg,xmmreg,xmmreg,imm8            \361\362\372\1\x0A\75\120\27         AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg,imm8            \361\362\372\1\x0A\75\120\27         AVX,SANDYBRIDGE
 
 
 [VRSQRTPS]
 [VRSQRTPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 ymmreg,ymmrm                         \362\364\370\1\x52\110               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \362\364\370\1\x52\110               AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \362\370\1\x52\110                   AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \362\370\1\x52\110                   AVX,SANDYBRIDGE
 
 
 [VRSQRTSS]
 [VRSQRTSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,mem32                  \333\362\370\1\x52\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,mem32                  \333\362\370\1\x52\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x52\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x52\75\120            AVX,SANDYBRIDGE
 
 
@@ -4703,22 +4704,22 @@ xmmreg,xmmreg,xmmrm,imm8             \362\370\1\xC6\75\120\27             AVX,SA
 ymmreg,ymmreg,ymmrm,imm8             \362\364\370\1\xC6\75\120\27         AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm,imm8             \362\364\370\1\xC6\75\120\27         AVX,SANDYBRIDGE
 
 
 [VSQRTPD]
 [VSQRTPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmrm                         \361\362\370\1\x51\110               AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \361\362\370\1\x51\110               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x51\110           AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \361\362\364\370\1\x51\110           AVX,SANDYBRIDGE
 
 
 [VSQRTPS]
 [VSQRTPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmrm                         \362\370\1\x51\110                   AVX,SANDYBRIDGE
 xmmreg,xmmrm                         \362\370\1\x51\110                   AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \362\364\370\1\x51\110               AVX,SANDYBRIDGE
 ymmreg,ymmrm                         \362\364\370\1\x51\110               AVX,SANDYBRIDGE
 
 
 [VSQRTSD]
 [VSQRTSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg,mem64                  \334\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,mem64                  \334\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 
 
 [VSQRTSS]
 [VSQRTSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,mem32                  \333\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,mem32                  \333\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x51\75\120            AVX,SANDYBRIDGE
 
 
@@ -4727,22 +4728,22 @@ xmmreg,xmmreg,xmmreg                 \333\362\370\1\x51\75\120            AVX,SA
 mem32                                \362\370\1\xAE\203                   AVX,SANDYBRIDGE
 mem32                                \362\370\1\xAE\203                   AVX,SANDYBRIDGE
 
 
 [VSUBPD]
 [VSUBPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x5C\75\120        AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x5C\75\120        AVX,SANDYBRIDGE
 
 
 [VSUBPS]
 [VSUBPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \362\370\1\x5C\75\120                AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \362\370\1\x5C\75\120                AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x5C\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x5C\75\120            AVX,SANDYBRIDGE
 
 
 [VSUBSD]
 [VSUBSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,mem64                  \334\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,mem64                  \334\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \334\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 
 
 [VSUBSS]
 [VSUBSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,mem32                  \333\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,mem32                  \333\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmreg                 \333\362\370\1\x5C\75\120            AVX,SANDYBRIDGE
 
 
@@ -4787,12 +4788,12 @@ xmmreg,xmmreg,xmmrm                  \362\370\1\x14\75\120                AVX,SA
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x14\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x14\75\120            AVX,SANDYBRIDGE
 
 
 [VXORPD]
 [VXORPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x57\75\120            AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\x57\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x57\75\120        AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x57\75\120        AVX,SANDYBRIDGE
 
 
 [VXORPS]
 [VXORPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \362\370\1\x57\75\120                AVX,SANDYBRIDGE
 xmmreg,xmmreg,xmmrm                  \362\370\1\x57\75\120                AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x57\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm                  \362\364\370\1\x57\75\120            AVX,SANDYBRIDGE
 
 
@@ -4809,36 +4810,40 @@ void                                 \362\370\1\x77                       AVX,SA
 ;*******************************************************************************
 ;*******************************************************************************
 
 
 [ANDN]
 [ANDN]
-(Ch_Wop1, Ch_Rop2, Ch_Rop3)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,reg32,rm32                      \362\371\1\xf2\75\120               BMI1
 reg32,reg32,rm32                      \362\371\1\xf2\75\120               BMI1
 reg64,reg64,rm64                      \362\363\371\1\xf2\75\120           BMI1,X86_64
 reg64,reg64,rm64                      \362\363\371\1\xf2\75\120           BMI1,X86_64
 
 
 [BEXTR]
 [BEXTR]
-(Ch_Wop1, Ch_Rop2, Ch_Rop3)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,rm32,reg32                      \362\371\1\xf7\76\110               BMI1
 reg32,rm32,reg32                      \362\371\1\xf7\76\110               BMI1
 reg64,rm64,reg64                      \362\363\371\1\xf7\76\110           BMI1,X86_64
 reg64,rm64,reg64                      \362\363\371\1\xf7\76\110           BMI1,X86_64
 
 
+[TZCNT]
+(Ch_Wop2, Ch_WFlags, Ch_Rop1)
+reg16|32|64,regmem                    \320\333\2\x0F\xBC\110              BMI1,SM
+
 ;*******************************************************************************
 ;*******************************************************************************
 ;********** BMI2 ***************************************************************
 ;********** BMI2 ***************************************************************
 ;*******************************************************************************
 ;*******************************************************************************
 
 
 [RORX]
 [RORX]
-(Ch_Wop1, Ch_Rop2, Ch_None)
+(Ch_Rop1, Ch_Wop2, Ch_None)
 reg32,rm32,imm8                      \334\362\372\1\xf0\110\26            BMI2
 reg32,rm32,imm8                      \334\362\372\1\xf0\110\26            BMI2
 reg64,rm64,imm8                      \334\362\363\372\1\xf0\110\26        BMI2,X86_64
 reg64,rm64,imm8                      \334\362\363\372\1\xf0\110\26        BMI2,X86_64
 
 
 [SARX]
 [SARX]
-(Ch_Wop1, Ch_Rop2, Ch_Rop3)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,rm32,reg32                      \333\362\371\1\xf7\76\110           BMI2
 reg32,rm32,reg32                      \333\362\371\1\xf7\76\110           BMI2
 reg64,rm64,reg64                      \333\362\363\371\1\xf7\76\110       BMI2,X86_64
 reg64,rm64,reg64                      \333\362\363\371\1\xf7\76\110       BMI2,X86_64
 
 
 [SHLX]
 [SHLX]
-(Ch_Wop1, Ch_Rop2, Ch_Rop3)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,rm32,reg32                      \361\362\371\1\xf7\76\110           BMI2
 reg32,rm32,reg32                      \361\362\371\1\xf7\76\110           BMI2
 reg64,rm64,reg64                      \361\362\363\371\1\xf7\76\110       BMI2,X86_64
 reg64,rm64,reg64                      \361\362\363\371\1\xf7\76\110       BMI2,X86_64
 
 
 [SHRX]
 [SHRX]
-(Ch_Wop1, Ch_Rop2, Ch_Rop3)
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,rm32,reg32                      \334\362\371\1\xf7\76\110           BMI2
 reg32,rm32,reg32                      \334\362\371\1\xf7\76\110           BMI2
 reg64,rm64,reg64                      \334\362\363\371\1\xf7\76\110       BMI2,X86_64
 reg64,rm64,reg64                      \334\362\363\371\1\xf7\76\110       BMI2,X86_64
 
 
@@ -4951,5 +4956,403 @@ xmmreg,xmmreg,xmmrm                  \361\362\371\1\x45\75\120            AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\363\364\371\1\x45\75\120    AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\363\364\371\1\x45\75\120    AVX2
 xmmreg,xmmreg,xmmrm                  \361\362\363\371\1\x45\75\120        AVX2
 xmmreg,xmmreg,xmmrm                  \361\362\363\371\1\x45\75\120        AVX2
 
 
+[VGATHERDPD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x92\76\110        AVX2
+ymmreg,xmem64,ymmreg                 \361\362\363\364\371\1\x92\76\110    AVX2
+
+[VGATHERDPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x92\76\110            AVX2
+ymmreg,ymem32,ymmreg                 \361\362\364\371\1\x92\76\110        AVX2
+
+[VGATHERQPD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x93\76\110        AVX2
+ymmreg,ymem64,ymmreg                 \361\362\363\364\371\1\x93\76\110    AVX2
+
+[VGATHERQPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x93\76\110            AVX2
+xmmreg,ymem32,xmmreg                 \361\362\364\371\1\x93\76\110        AVX2
+
+[VPGATHERDD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x90\76\110            AVX2
+ymmreg,ymem32,ymmreg                 \361\362\364\371\1\x90\76\110        AVX2
+
+[VPGATHERDQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x90\76\110        AVX2
+ymmreg,xmem64,ymmreg                 \361\362\363\364\371\1\x90\76\110    AVX2
+
+[VPGATHERQD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x91\76\110            AVX2
+xmmreg,ymem32,xmmreg                 \361\362\364\371\1\x91\76\110        AVX2
+
+[VPGATHERQQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x91\76\110        AVX2
+ymmreg,ymem64,ymmreg                 \361\362\363\364\371\1\x91\76\110    AVX2
+
+;*******************************************************************************
+;********** NEC V20/V30 ********************************************************
+;*******************************************************************************
+
+[ADD4S]
+(Ch_All, Ch_None, Ch_None)
+void                                  \2\x0F\x20                          NEC,16BITONLY
+
+[BRKEM]
+(Ch_All, Ch_None, Ch_None)
+imm                                   \2\x0F\xFF\24                       NEC,SB,16BITONLY
+
+[CLR1]
+(Ch_Mop2, Ch_Rop1, Ch_None)
+rm8,reg_cl                            \2\x0F\x12\200                      NEC,16BITONLY
+rm16,reg_cl                           \2\x0F\x13\200                      NEC,16BITONLY
+rm8,imm                               \2\x0F\x1A\200\25                   NEC,SB,16BITONLY
+rm16,imm                              \2\x0F\x1B\200\25                   NEC,SW,16BITONLY
+
+[CMP4S]
+(Ch_All, Ch_None, Ch_None)
+void                                  \2\x0F\x26                          NEC,16BITONLY
+
+[EXT]
+(Ch_All, Ch_None, Ch_None)
+reg8,reg8                             \2\x0F\x33\101                      NEC,16BITONLY
+reg8,imm                              \2\x0F\x3B\200\25                   NEC,SB,16BITONLY
+
+;[FPO2]
+
+[INS]
+(Ch_All, Ch_None, Ch_None)
+reg8,reg8                             \2\x0F\x31\101                      NEC,16BITONLY
+reg8,imm                              \2\x0F\x39\200\25                   NEC,SB,16BITONLY
+
+[NOT1]
+(Ch_Mop2, Ch_Rop1, Ch_None)
+rm8,reg_cl                            \2\x0F\x16\200                      NEC,16BITONLY
+rm16,reg_cl                           \2\x0F\x17\200                      NEC,16BITONLY
+rm8,imm                               \2\x0F\x1E\200\25                   NEC,SB,16BITONLY
+rm16,imm                              \2\x0F\x1F\200\25                   NEC,SW,16BITONLY
+
+[REPC]
+(Ch_RWECX, Ch_RWFlags, Ch_None)
+void                                  \1\x65                              NEC,PRE,16BITONLY
+
+[REPNC]
+(Ch_RWECX, Ch_RWFlags, Ch_None)
+void                                  \1\x64                              NEC,PRE,16BITONLY
 
 
+[ROL4]
+(Ch_Mop1, Ch_RWEAX, Ch_None)
+rm8                                   \2\x0F\x28\200                      NEC,16BITONLY
+
+[ROR4]
+(Ch_Mop1, Ch_RWEAX, Ch_None)
+rm8                                   \2\x0F\x2A\200                      NEC,16BITONLY
+
+[SET1]
+(Ch_Mop2, Ch_Rop1, Ch_None)
+rm8,reg_cl                            \2\x0F\x14\200                      NEC,16BITONLY
+rm16,reg_cl                           \2\x0F\x15\200                      NEC,16BITONLY
+rm8,imm                               \2\x0F\x1C\200\25                   NEC,SB,16BITONLY
+rm16,imm                              \2\x0F\x1D\200\25                   NEC,SW,16BITONLY
+
+[SUB4S]
+(Ch_All, Ch_None, Ch_None)
+void                                  \2\x0F\x22                          NEC,16BITONLY
+
+[TEST1]
+(Ch_WFlags, Ch_Rop1, Ch_Rop2)
+rm8,reg_cl                            \2\x0F\x10\200                      NEC,16BITONLY
+rm16,reg_cl                           \2\x0F\x11\200                      NEC,16BITONLY
+rm8,imm                               \2\x0F\x18\200\25                   NEC,SB,16BITONLY
+rm16,imm                              \2\x0F\x19\200\25                   NEC,SW,16BITONLY
+
+;*******************************************************************************
+;********** FMA ****************************************************************
+;*******************************************************************************
+[VFMADD132PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x98\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\x98\75\120    FMA
+
+[VFMADD213PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xA8\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xA8\75\120    FMA
+
+[VFMADD231PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xB8\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xB8\75\120    FMA
+
+[VFMADDPD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm,xmmreg           \361\362\372\1\x69\75\120\367        FMA4
+xmmreg,xmmreg,xmmreg,xmmrm           \361\362\372\363\1\x69\75\130\366    FMA4
+
+[VFMADD132PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x98\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x98\75\120        FMA
+
+[VFMADD213PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xA8\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xA8\75\120        FMA
+
+[VFMADD231PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xB8\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xB8\75\120        FMA
+
+[VFMADD132SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x99\75\120        FMA
+
+[VFMADD213SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xa9\75\120        FMA
+
+[VFMADD231SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xb9\75\120        FMA
+
+[VFMADD132SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x99\75\120            FMA
+
+[VFMADD213SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xA9\75\120            FMA
+
+[VFMADD231SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xb9\75\120            FMA
+
+[VFMADDSUB132PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x96\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\x96\75\120    FMA
+
+[VFMADDSUB213PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xA6\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xA6\75\120    FMA
+
+[VFMADDSUB231PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xB6\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xB6\75\120    FMA
+
+[VFMADDSUB132PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x96\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x96\75\120        FMA
+
+[VFMADDSUB213PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xA6\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xA6\75\120        FMA
+
+[VFMADDSUB231PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xB6\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xB6\75\120        FMA
+
+[VFMSUBADD132PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x97\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\x97\75\120    FMA
+
+[VFMSUBADD213PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xA7\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xA7\75\120    FMA
+
+[VFMSUBADD231PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xB7\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xB7\75\120    FMA
+
+[VFMSUBADD132PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x97\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x97\75\120        FMA
+
+[VFMSUBADD213PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xA7\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xA7\75\120        FMA
+
+[VFMSUBADD231PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xB7\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xB7\75\120        FMA
+
+[VFMSUB132PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x9A\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\x9A\75\120    FMA
+
+[VFMSUB213PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xAA\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xAA\75\120    FMA
+
+[VFMSUB231PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xBA\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xBA\75\120    FMA
+
+[VFMSUB132PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x9A\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x9A\75\120        FMA
+
+[VFMSUB213PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xAA\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xAA\75\120        FMA
+
+[VFMSUB231PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xBA\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xBA\75\120        FMA
+
+[VFMSUB132SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x9B\75\120        FMA
+
+[VFMSUB213SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xAB\75\120        FMA
+
+[VFMSUB231SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xBB\75\120        FMA
+
+[VFMSUB132SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x9B\75\120            FMA
+
+[VFMSUB213SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xAB\75\120            FMA
+
+[VFMSUB231SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xBB\75\120            FMA
+
+[VFNMADD132PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x9C\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\x9C\75\120    FMA
+
+[VFNMADD213PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xAC\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xAC\75\120    FMA
+
+[VFNMADD231PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xBC\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xBC\75\120    FMA
+
+[VFNMADD132PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x9C\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x9C\75\120        FMA
+
+[VFNMADD213PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xAC\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xAC\75\120        FMA
+
+[VFNMADD231PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xBC\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xBC\75\120        FMA
+
+[VFNMADD132SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x9D\75\120        FMA
+
+[VFNMADD213SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xAD\75\120        FMA
+
+[VFNMADD231SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xBD\75\120        FMA
+
+[VFNMADD132SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x9D\75\120            FMA
+
+[VFNMADD213SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xAD\75\120            FMA
+
+[VFNMADD231SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xBD\75\120            FMA
+
+[VFNMSUB132PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x9E\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\x9E\75\120    FMA
+
+[VFNMSUB213PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xAE\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xAE\75\120    FMA
+
+[VFNMSUB231PD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xBE\75\120        FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xBE\75\120    FMA
+
+[VFNMSUB132PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x9E\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x9E\75\120        FMA
+
+[VFNMSUB213PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xAE\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xAE\75\120        FMA
+
+[VFNMSUB231PS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xBE\75\120            FMA
+ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\xBE\75\120        FMA
+
+[VFNMSUB132SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\x9F\75\120        FMA
+
+[VFNMSUB213SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xAF\75\120        FMA
+
+[VFNMSUB231SD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xBF\75\120        FMA
+
+[VFNMSUB132SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\x9F\75\120            FMA
+
+[VFNMSUB213SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xAF\75\120            FMA
+
+[VFNMSUB231SS]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                  \361\362\371\1\xBF\75\120            FMA
 
 

+ 32 - 24
compiler/x86/x86reg.dat

@@ -6,6 +6,14 @@
 ;
 ;
 ; For stab/dwarf numbers see gdb/i386-tdep.c and gdb/amd64-tdep.c
 ; For stab/dwarf numbers see gdb/i386-tdep.c and gdb/amd64-tdep.c
 ;
 ;
+; NOTE: registers are numbered so that 3 LSB of superregister number matches the opcode.
+; Exceptions are:
+;  - high byte registers (AH/CH/DH/BH)
+;  - NR_FLAGS, NR_EIP and NR_RIP.
+;  - We use order [eax ecx edx ebx esi edi ebp esp], while the correct
+;    one is       [eax ecx edx ebx esp ebp esi edi],
+;    this is due to asm optimizer coding style
+
 NR_NO,$00000000,INVALID,INVALID,INVALID,INVALID,-1,-1,-1,OT_NONE,0
 NR_NO,$00000000,INVALID,INVALID,INVALID,INVALID,-1,-1,-1,OT_NONE,0
 NR_AL,$01010000,al,%al,al,al,0,0,0,OT_REG_AL,0
 NR_AL,$01010000,al,%al,al,al,0,0,0,OT_REG_AL,0
 NR_AH,$01020000,ah,%ah,ah,ah,0,0,0,OT_REG8,4
 NR_AH,$01020000,ah,%ah,ah,ah,0,0,0,OT_REG8,4
@@ -76,33 +84,33 @@ NR_R15L,$0101000f,r15b,%r15b,r15b,r15b,-1,-1,15,OT_REG8,7,64
 NR_R15W,$0103000f,r15w,%r15w,r15w,r15w,-1,-1,15,OT_REG16,7,64
 NR_R15W,$0103000f,r15w,%r15w,r15w,r15w,-1,-1,15,OT_REG16,7,64
 NR_R15D,$0104000f,r15d,%r15d,r15d,r15d,-1,-1,15,OT_REG32,7,64
 NR_R15D,$0104000f,r15d,%r15d,r15d,r15d,-1,-1,15,OT_REG32,7,64
 
 
-; EIP is needed for DWARF call frame info return address (RA)
-NR_RIP,$05050000,rip,%rip,rip,rip,-1,8,16,OT_NONE,0,64
-NR_EIP,$05040000,eip,%eip,eip,eip,-1,8,16,OT_NONE,0
+NR_ES,$05000000,es,%es,es,es,-1,-1,-1,OT_REG_DESS,0
 NR_CS,$05000001,cs,%cs,cs,cs,-1,-1,-1,OT_REG_CS,1
 NR_CS,$05000001,cs,%cs,cs,cs,-1,-1,-1,OT_REG_CS,1
-NR_DS,$05000002,ds,%ds,ds,ds,-1,-1,-1,OT_REG_DESS,3
-NR_ES,$05000003,es,%es,es,es,-1,-1,-1,OT_REG_DESS,0
-NR_SS,$05000004,ss,%ss,ss,ss,-1,-1,-1,OT_REG_DESS,2
-NR_FS,$05000005,fs,%fs,fs,fs,-1,-1,-1,OT_REG_FSGS,4
-NR_GS,$05000006,gs,%gs,gs,gs,-1,-1,-1,OT_REG_FSGS,5
+NR_SS,$05000002,ss,%ss,ss,ss,-1,-1,-1,OT_REG_DESS,2
+NR_DS,$05000003,ds,%ds,ds,ds,-1,-1,-1,OT_REG_DESS,3
+NR_FS,$05000004,fs,%fs,fs,fs,-1,-1,-1,OT_REG_FSGS,4
+NR_GS,$05000005,gs,%gs,gs,gs,-1,-1,-1,OT_REG_FSGS,5
 
 
-NR_DR0,$05000007,dr0,%dr0,dr0,dr0,-1,-1,-1,OT_REG_DREG,0
-NR_DR1,$05000008,dr1,%dr1,dr1,dr1,-1,-1,-1,OT_REG_DREG,1
-NR_DR2,$05000009,dr2,%dr2,dr2,dr2,-1,-1,-1,OT_REG_DREG,2
-NR_DR3,$0500000a,dr3,%dr3,dr3,dr3,-1,-1,-1,OT_REG_DREG,3
-NR_DR6,$0500000b,dr6,%dr6,dr6,dr6,-1,-1,-1,OT_REG_DREG,6
-NR_DR7,$0500000c,dr7,%dr7,dr7,dr7,-1,-1,-1,OT_REG_DREG,7
-NR_CR0,$0500000d,cr0,%cr0,cr0,cr0,-1,-1,-1,OT_REG_CREG,0
-NR_CR2,$0500000e,cr2,%cr2,cr2,cr2,-1,-1,-1,OT_REG_CREG,2
-NR_CR3,$0500000f,cr3,%cr3,cr3,cr3,-1,-1,-1,OT_REG_CREG,3
-NR_CR4,$05000010,cr4,%cr4,cr4,cr4,-1,-1,-1,OT_REG_CR4,4
-NR_TR3,$05000011,tr3,%tr3,tr3,tr3,-1,-1,-1,OT_REG_TREG,3
-NR_TR4,$05000012,tr4,%tr4,tr4,tr4,-1,-1,-1,OT_REG_TREG,4
-NR_TR5,$05000013,tr5,%tr5,tr5,tr5,-1,-1,-1,OT_REG_TREG,5
-NR_TR6,$05000014,tr6,%tr6,tr6,tr6,-1,-1,-1,OT_REG_TREG,6
-NR_TR7,$05000015,tr7,%tr7,tr7,tr7,-1,-1,-1,OT_REG_TREG,7
+NR_FLAGS,$05000006,flags,%flags,flags,flags,-1,-1,-1,OT_NONE,0
+; EIP is needed for DWARF call frame info return address (RA)
+NR_RIP,$05050007,rip,%rip,rip,rip,-1,8,16,OT_NONE,0,64
+NR_EIP,$05040007,eip,%eip,eip,eip,-1,8,16,OT_NONE,0
 
 
-NR_FLAGS,$05000016,flags,%flags,flags,flags,-1,-1,-1,OT_NONE,0
+NR_DR0,$05000008,dr0,%dr0,dr0,dr0,-1,-1,-1,OT_REG_DREG,0
+NR_DR1,$05000009,dr1,%dr1,dr1,dr1,-1,-1,-1,OT_REG_DREG,1
+NR_DR2,$0500000a,dr2,%dr2,dr2,dr2,-1,-1,-1,OT_REG_DREG,2
+NR_DR3,$0500000b,dr3,%dr3,dr3,dr3,-1,-1,-1,OT_REG_DREG,3
+NR_DR6,$0500000d,dr6,%dr6,dr6,dr6,-1,-1,-1,OT_REG_DREG,6
+NR_DR7,$0500000e,dr7,%dr7,dr7,dr7,-1,-1,-1,OT_REG_DREG,7
+NR_CR0,$05000010,cr0,%cr0,cr0,cr0,-1,-1,-1,OT_REG_CREG,0
+NR_CR2,$05000012,cr2,%cr2,cr2,cr2,-1,-1,-1,OT_REG_CREG,2
+NR_CR3,$05000013,cr3,%cr3,cr3,cr3,-1,-1,-1,OT_REG_CREG,3
+NR_CR4,$05000014,cr4,%cr4,cr4,cr4,-1,-1,-1,OT_REG_CR4,4
+NR_TR3,$0500001b,tr3,%tr3,tr3,tr3,-1,-1,-1,OT_REG_TREG,3
+NR_TR4,$0500001c,tr4,%tr4,tr4,tr4,-1,-1,-1,OT_REG_TREG,4
+NR_TR5,$0500001d,tr5,%tr5,tr5,tr5,-1,-1,-1,OT_REG_TREG,5
+NR_TR6,$0500001e,tr6,%tr6,tr6,tr6,-1,-1,-1,OT_REG_TREG,6
+NR_TR7,$0500001f,tr7,%tr7,tr7,tr7,-1,-1,-1,OT_REG_TREG,7
 
 
 NR_ST0,$02000000,st(0),%st(0),st(0),st0,12,11,33,OT_FPU0,0
 NR_ST0,$02000000,st(0),%st(0),st(0),st0,12,11,33,OT_FPU0,0
 NR_ST1,$02000001,st(1),%st(1),st(1),st1,13,12,34,OT_FPUREG,1
 NR_ST1,$02000001,st(1),%st(1),st(1),st1,13,12,34,OT_FPUREG,1

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.