Browse Source

merge with trunk

git-svn-id: branches/tg74/avx2@28411 -
tg74 11 years ago
parent
commit
e9f2dcabc5

+ 1 - 1
.gitattributes

@@ -806,7 +806,6 @@ compiler/x86_64/r8664int.inc svneol=native#text/plain
 compiler/x86_64/r8664iri.inc svneol=native#text/plain
 compiler/x86_64/r8664iri.inc svneol=native#text/plain
 compiler/x86_64/r8664nor.inc svneol=native#text/plain
 compiler/x86_64/r8664nor.inc svneol=native#text/plain
 compiler/x86_64/r8664num.inc svneol=native#text/plain
 compiler/x86_64/r8664num.inc svneol=native#text/plain
-compiler/x86_64/r8664op.inc svneol=native#text/plain
 compiler/x86_64/r8664ot.inc svneol=native#text/plain
 compiler/x86_64/r8664ot.inc svneol=native#text/plain
 compiler/x86_64/r8664rni.inc svneol=native#text/plain
 compiler/x86_64/r8664rni.inc svneol=native#text/plain
 compiler/x86_64/r8664sri.inc svneol=native#text/plain
 compiler/x86_64/r8664sri.inc svneol=native#text/plain
@@ -815,6 +814,7 @@ compiler/x86_64/r8664std.inc svneol=native#text/plain
 compiler/x86_64/rax64att.pas svneol=native#text/plain
 compiler/x86_64/rax64att.pas svneol=native#text/plain
 compiler/x86_64/rax64int.pas svneol=native#text/plain
 compiler/x86_64/rax64int.pas svneol=native#text/plain
 compiler/x86_64/rgcpu.pas svneol=native#text/plain
 compiler/x86_64/rgcpu.pas svneol=native#text/plain
+compiler/x86_64/symcpu.pas svneol=native#text/plain
 compiler/x86_64/win64unw.pas svneol=native#text/plain
 compiler/x86_64/win64unw.pas svneol=native#text/plain
 compiler/x86_64/x8664ats.inc svneol=native#text/plain
 compiler/x86_64/x8664ats.inc svneol=native#text/plain
 compiler/x86_64/x8664att.inc svneol=native#text/plain
 compiler/x86_64/x8664att.inc svneol=native#text/plain

+ 209 - 1
compiler/x86_64/aoptcpu.pas

@@ -63,6 +63,94 @@ begin
     end;
     end;
 end;
 end;
 
 
+
+function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
+  begin
+    result :=
+      (instr.typ = ait_instruction) and
+      (taicpu(instr).opcode = op) and
+      ((opsize = []) or (taicpu(instr).opsize in opsize));
+  end;
+
+
+function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
+  begin
+    result :=
+      (instr.typ = ait_instruction) and
+      ((taicpu(instr).opcode = op1) or
+       (taicpu(instr).opcode = op2)
+      ) and
+      ((opsize = []) or (taicpu(instr).opsize in opsize));
+  end;
+
+
+function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
+  begin
+    result :=
+      (instr.typ = ait_instruction) and
+      ((taicpu(instr).opcode = op1) or
+       (taicpu(instr).opcode = op2) or
+       (taicpu(instr).opcode = op3)
+      ) and
+      ((opsize = []) or (taicpu(instr).opsize in opsize));
+  end;
+
+
+function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+  begin
+    result := (oper.typ = top_reg) and (oper.reg = reg);
+  end;
+
+
+function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
+  begin
+    result := (oper.typ = top_const) and (oper.val = a);
+  end;
+
+function refsequal(const r1, r2: treference): boolean;
+  begin
+    refsequal :=
+      (r1.offset = r2.offset) and
+      (r1.segment = r2.segment) and (r1.base = r2.base) and
+      (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+      (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
+      (r1.relsymbol = r2.relsymbol);
+  end;
+
+
+function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+  begin
+    result := oper1.typ = oper2.typ;
+
+    if result then
+      case oper1.typ of
+        top_const:
+          Result:=oper1.val = oper2.val;
+        top_reg:
+          Result:=oper1.reg = oper2.reg;
+        top_ref:
+          Result:=RefsEqual(oper1.ref^, oper2.ref^);
+        else
+          internalerror(2013102801);
+      end
+  end;
+
+
+function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
+  begin
+   Result:=(ref.offset=0) and
+     (ref.scalefactor in [0,1]) and
+     (ref.segment=NR_NO) and
+     (ref.symbol=nil) and
+     (ref.relsymbol=nil) and
+     ((base=NR_INVALID) or
+      (ref.base=base)) and
+     ((index=NR_INVALID) or
+      (ref.index=index));
+  end;
+
+
+
 function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
 function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
 var
 var
   next1: tai;
   next1: tai;
@@ -99,7 +187,66 @@ begin
               asml.remove(p);
               asml.remove(p);
               p.Free;
               p.Free;
               p:=hp1;
               p:=hp1;
-            end;
+            end
+          else if (taicpu(p).oper[0]^.typ = top_const) and
+            (taicpu(p).oper[1]^.typ = top_reg) and
+            GetNextInstruction(p, hp1) and
+            MatchInstruction(hp1,A_MOVZX,[]) and
+            (taicpu(hp1).oper[0]^.typ = top_reg) and
+            MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
+            (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
+             (((taicpu(p).opsize=S_W) and
+               (taicpu(hp1).opsize=S_BW)) or
+              ((taicpu(p).opsize=S_L) and
+               (taicpu(hp1).opsize in [S_WL,S_BL])) or
+               ((taicpu(p).opsize=S_Q) and
+               (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
+              ) then
+                begin
+                  if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
+                      ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
+                      ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize)=S_LQ) and
+                      ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
+                     ) then
+                     begin
+                       if (cs_asm_source in current_settings.globalswitches) then
+                         asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovzToAnd')),p);
+                       asml.remove(hp1);
+                       hp1.free;
+                     end;
+                end
+          else if (taicpu(p).oper[0]^.typ = top_const) and
+            (taicpu(p).oper[1]^.typ = top_reg) and
+            GetNextInstruction(p, hp1) and
+            MatchInstruction(hp1,A_MOVSX,A_MOVSXD,[]) and
+            (taicpu(hp1).oper[0]^.typ = top_reg) and
+            MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
+            (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
+             (((taicpu(p).opsize=S_W) and
+               (taicpu(hp1).opsize=S_BW)) or
+              ((taicpu(p).opsize=S_L) and
+               (taicpu(hp1).opsize in [S_WL,S_BL])) or
+               ((taicpu(p).opsize=S_Q) and
+               (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
+              ) then
+                begin
+                  if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
+                      ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
+                      ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize)=S_LQ) and
+                      ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
+                     ) then
+                     begin
+                       if (cs_asm_source in current_settings.globalswitches) then
+                         asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovsxToAnd')),p);
+                       asml.remove(hp1);
+                       hp1.free;
+                     end;
+                end;
+
 (*                      else
 (*                      else
   {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
   {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
   jump, but only if it's a conditional jump (PFV) }
   jump, but only if it's a conditional jump (PFV) }
@@ -134,6 +281,41 @@ begin
                       hp1.free;
                       hp1.free;
                     end;
                     end;
               end
               end
+            { Next instruction is also a MOV ? }
+            else if GetNextIntruction_p and
+              MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
+              begin
+                if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
+                   (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
+                    {mov reg1, mem1     or     mov mem1, reg1
+                     mov mem2, reg2            mov reg2, mem2}
+                  begin
+                    if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
+                      {mov reg1, mem1     or     mov mem1, reg1
+                       mov mem2, reg1            mov reg2, mem1}
+                      begin
+                        if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
+                          { Removes the second statement from
+                            mov reg1, mem1/reg2
+                            mov mem1/reg2, reg1 }
+                          begin
+                            { if (taicpu(p).oper[0]^.typ = top_reg) then
+                              AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); }
+                            if (cs_asm_source in current_settings.globalswitches) then
+                              asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2Mov1')),p);
+                            asml.remove(hp1);
+                            hp1.free;
+                          end;
+                      end
+                    else if (taicpu(p).oper[1]^.typ=top_ref) and
+                      OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
+                      begin
+                        taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
+                        if (cs_asm_source in current_settings.globalswitches) then
+                          asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2MovMov1')),p);
+                      end;
+                  end
+              end
             else if (taicpu(p).oper[1]^.typ = top_reg) and
             else if (taicpu(p).oper[1]^.typ = top_reg) and
               GetNextIntruction_p and
               GetNextIntruction_p and
               (hp1.typ = ait_instruction) and
               (hp1.typ = ait_instruction) and
@@ -482,6 +664,32 @@ begin
                 end;
                 end;
             end;
             end;
           end;
           end;
+        A_VDIVSD,
+        A_VDIVSS,
+        A_VSUBSD,
+        A_VSUBSS,
+        A_VMULSD,
+        A_VMULSS,
+        A_VADDSD,
+        A_VADDSS:
+          begin
+            if GetNextInstruction(p,hp1) and
+              { we mix single and double opperations here because we assume that the compiler
+                generates vmovapd only after double operations and vmovaps only after single operations }
+              MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
+              MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
+              (taicpu(hp1).oper[1]^.typ=top_reg) then
+              begin
+                CopyUsedRegs(TmpUsedRegs);
+                UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                If not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                  begin
+                    taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
+                    asml.Remove(hp1);
+                    hp1.Free;
+                  end;
+              end;
+          end;
         end;
         end;
       end;
       end;
     end;
     end;

+ 14 - 15
compiler/x86_64/cgcpu.pas

@@ -57,12 +57,12 @@ unit cgcpu;
     uses
     uses
        globtype,globals,verbose,systems,cutils,cclasses,
        globtype,globals,verbose,systems,cutils,cclasses,
        symsym,symtable,defutil,paramgr,fmodule,cpupi,
        symsym,symtable,defutil,paramgr,fmodule,cpupi,
-       rgobj,tgobj,rgcpu;
+       rgobj,tgobj,rgcpu,ncgutil;
 
 
 
 
     procedure Tcgx86_64.init_register_allocators;
     procedure Tcgx86_64.init_register_allocators;
       const
       const
-        win64_saved_std_regs : array[0..6] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15);
+        win64_saved_std_regs : array[0..7] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP);
         others_saved_std_regs : array[0..4] of tsuperregister = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
         others_saved_std_regs : array[0..4] of tsuperregister = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
         saved_regs_length : array[boolean] of longint = (5,7);
         saved_regs_length : array[boolean] of longint = (5,7);
 
 
@@ -96,8 +96,16 @@ unit cgcpu;
               end;
               end;
           end;
           end;
         if target_info.system=system_x86_64_win64 then
         if target_info.system=system_x86_64_win64 then
-          rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
-            RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
+          begin
+            if (cs_userbp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+              begin
+                rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
+                  RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP],first_int_imreg,[]);
+              end
+            else
+              rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
+                RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
+          end
         else
         else
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
             RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
             RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
@@ -140,7 +148,6 @@ unit cgcpu;
         frame_offset: longint;
         frame_offset: longint;
         suppress_endprologue: boolean;
         suppress_endprologue: boolean;
         stackmisalignment: longint;
         stackmisalignment: longint;
-        para: tparavarsym;
         xmmsize: longint;
         xmmsize: longint;
 
 
       procedure push_one_reg(reg: tregister);
       procedure push_one_reg(reg: tregister);
@@ -196,15 +203,7 @@ unit cgcpu;
                 else
                 else
                   begin
                   begin
                     push_regs;
                     push_regs;
-                    { load framepointer from hidden $parentfp parameter }
-                    para:=tparavarsym(current_procinfo.procdef.paras[0]);
-                    if not (vo_is_parentfp in para.varoptions) then
-                      InternalError(201201142);
-                    if (para.paraloc[calleeside].location^.loc<>LOC_REGISTER) or
-                       (para.paraloc[calleeside].location^.next<>nil) then
-                      InternalError(201201143);
-                    list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],
-                      para.paraloc[calleeside].location^.register,NR_FRAME_POINTER_REG));
+                    gen_load_frame_for_exceptfilter(list);
                     { Need only as much stack space as necessary to do the calls.
                     { Need only as much stack space as necessary to do the calls.
                       Exception filters don't have own local vars, and temps are 'mapped'
                       Exception filters don't have own local vars, and temps are 'mapped'
                       to the parent procedure.
                       to the parent procedure.
@@ -488,7 +487,7 @@ unit cgcpu;
         a_loadaddr_ref_cgpara(list,href,para2);
         a_loadaddr_ref_cgpara(list,href,para2);
         paramanager.freecgpara(list,para2);
         paramanager.freecgpara(list,para2);
         paramanager.freecgpara(list,para1);
         paramanager.freecgpara(list,para1);
-        g_call(current_asmdata.CurrAsmList,'_FPC_local_unwind');
+        g_call(list,'_FPC_local_unwind');
         para2.done;
         para2.done;
         para1.done;
         para1.done;
       end;
       end;

+ 4 - 0
compiler/x86_64/cpubase.inc

@@ -38,6 +38,8 @@ type
     S_YMM
     S_YMM
   );
   );
 
 
+  TOpSizes = set of topsize;
+
 {*****************************************************************************
 {*****************************************************************************
                                   Registers
                                   Registers
 *****************************************************************************}
 *****************************************************************************}
@@ -126,6 +128,8 @@ const
       { these arrays differ between unix and win64 }
       { these arrays differ between unix and win64 }
       saved_standard_registers : array of tsuperregister = nil;
       saved_standard_registers : array of tsuperregister = nil;
       saved_mm_registers : array of tsuperregister = nil;
       saved_mm_registers : array of tsuperregister = nil;
+
+      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
       { Required parameter alignment when calling a routine declared as
       { Required parameter alignment when calling a routine declared as
         stdcall and cdecl. The alignment value should be the one defined
         stdcall and cdecl. The alignment value should be the one defined
         by GCC or the target ABI.
         by GCC or the target ABI.

+ 32 - 4
compiler/x86_64/cpuinfo.pas

@@ -40,7 +40,10 @@ Type
 
 
    tcputype =
    tcputype =
       (cpu_none,
       (cpu_none,
-       cpu_athlon64
+       cpu_athlon64,
+       cpu_core_i,
+       cpu_core_avx,
+       cpu_core_avx2
       );
       );
 
 
    tfputype =
    tfputype =
@@ -77,7 +80,10 @@ Const
    ];
    ];
 
 
    cputypestr : array[tcputype] of string[10] = ('',
    cputypestr : array[tcputype] of string[10] = ('',
-     'ATHLON64'
+     'ATHLON64',
+     'COREI',
+     'COREAVX',
+     'COREAVX2'
    );
    );
 
 
    fputypestr : array[tfputype] of string[6] = ('',
    fputypestr : array[tfputype] of string[6] = ('',
@@ -102,14 +108,36 @@ Const
                                  genericlevel3optimizerswitches-
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_stackframe,
+                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_stackframe,cs_userbp,
 				  cs_opt_tailrecursion,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 				  cs_opt_tailrecursion,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + 
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + 
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
-   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
+   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_userbp];
+
+type
+   tcpuflags =
+      (CPUX86_HAS_SSEUNIT,
+       CPUX86_HAS_BMI1,
+       CPUX86_HAS_BMI2,
+       CPUX86_HAS_POPCNT,
+       CPUX86_HAS_AVXUNIT,
+       CPUX86_HAS_LZCNT,
+       CPUX86_HAS_MOVBE,
+       CPUX86_HAS_FMA,
+       CPUX86_HAS_FMA4
+      );
+
+ const
+   cpu_capabilities : array[tcputype] of set of tcpuflags = (
+     { cpu_none      } [],
+     { Athlon64      } [CPUX86_HAS_SSEUNIT],
+     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
+     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
+     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+   );
 
 
 Implementation
 Implementation
 
 

+ 3 - 1
compiler/x86_64/cpunode.pas

@@ -56,7 +56,9 @@ unit cpunode;
        nx64flw,
        nx64flw,
 {$endif DISABLE_WIN64_SEH}
 {$endif DISABLE_WIN64_SEH}
        nx64inl,
        nx64inl,
-       nx64set
+       nx64set,
+       { symtable }
+       symcpu
        ;
        ;
 
 
 end.
 end.

+ 14 - 57
compiler/x86_64/cpupara.pas

@@ -41,7 +41,6 @@ unit cpupara;
           function param_use_paraloc(const cgpara:tcgpara):boolean;override;
           function param_use_paraloc(const cgpara:tcgpara):boolean;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
@@ -521,7 +520,10 @@ unit cpupara;
         num: longint;
         num: longint;
         isbitpacked: boolean;
         isbitpacked: boolean;
       begin
       begin
+        size:=0;
+        bitoffset:=0;
         result:=init_aggregate_classification(def,varspez,byte_offset,words,classes);
         result:=init_aggregate_classification(def,varspez,byte_offset,words,classes);
+
         if (words=0) then
         if (words=0) then
           exit;
           exit;
 
 
@@ -889,62 +891,6 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    procedure tx86_64paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
-      var
-        paraloc : pcgparalocation;
-        psym : tparavarsym;
-        pdef : tdef;
-      begin
-        psym:=tparavarsym(pd.paras[nr-1]);
-        pdef:=psym.vardef;
-        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
-          pdef:=getpointerdef(pdef);
-        cgpara.reset;
-        cgpara.size:=def_cgsize(pdef);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=get_para_align(pd.proccalloption);
-        cgpara.def:=pdef;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-         begin
-           size:=def_cgsize(pdef);
-           paraloc^.def:=pdef;
-           if target_info.system=system_x86_64_win64 then
-             begin
-               if nr<1 then
-                 internalerror(200304303)
-               else if nr<=high(paraintsupregs_winx64)+1 then
-                 begin
-                    loc:=LOC_REGISTER;
-                    register:=newreg(R_INTREGISTER,paraintsupregs_winx64[nr-1],cgsize2subreg(R_INTREGISTER,size));
-                 end
-               else
-                 begin
-                    loc:=LOC_REFERENCE;
-                    reference.index:=NR_STACK_POINTER_REG;
-                    reference.offset:=(nr-6)*sizeof(aint);
-                 end;
-             end
-           else
-             begin
-               if nr<1 then
-                 internalerror(200304303)
-               else if nr<=high(paraintsupregs)+1 then
-                 begin
-                    loc:=LOC_REGISTER;
-                    register:=newreg(R_INTREGISTER,paraintsupregs[nr-1],cgsize2subreg(R_INTREGISTER,size));
-                 end
-               else
-                 begin
-                    loc:=LOC_REFERENCE;
-                    reference.index:=NR_STACK_POINTER_REG;
-                    reference.offset:=(nr-6)*sizeof(aint);
-                 end;
-             end;
-          end;
-      end;
-
-
     function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
     function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       const
       const
         intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
         intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
@@ -1110,6 +1056,7 @@ unit cpupara;
         i,
         i,
         varalign,
         varalign,
         paraalign  : longint;
         paraalign  : longint;
+        sym: tfieldvarsym;
       begin
       begin
         paraalign:=get_para_align(p.proccalloption);
         paraalign:=get_para_align(p.proccalloption);
         { Register parameters are assigned from left to right }
         { Register parameters are assigned from left to right }
@@ -1117,6 +1064,16 @@ unit cpupara;
           begin
           begin
             hp:=tparavarsym(paras[i]);
             hp:=tparavarsym(paras[i]);
             paradef:=hp.vardef;
             paradef:=hp.vardef;
+            { on win64, if a record has only one field and that field is a
+              single or double, it has to be handled like a single/double }
+            if (target_info.system=system_x86_64_win64) and
+               ((paradef.typ=recorddef) {or
+               is_object(paradef)}) and
+               tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(sym) and
+               (sym.vardef.typ=floatdef) and
+               (tfloatdef(sym.vardef).floattype in [s32real,s64real]) then
+              paradef:=sym.vardef;
+
             pushaddr:=push_addr_param(hp.varspez,paradef,p.proccalloption);
             pushaddr:=push_addr_param(hp.varspez,paradef,p.proccalloption);
             if pushaddr then
             if pushaddr then
               begin
               begin

+ 3 - 0
compiler/x86_64/cputarg.pas

@@ -60,6 +60,9 @@ implementation
     {$ifndef NOAGX86_64ATT}
     {$ifndef NOAGX86_64ATT}
       ,agx86att
       ,agx86att
     {$endif}
     {$endif}
+    {$ifndef NOAGX86_64NSM}
+      ,agx86nsm
+    {$endif}
 
 
       ,ogcoff
       ,ogcoff
       ,ogelf
       ,ogelf

+ 75 - 52
compiler/x86_64/nx64add.pas

@@ -37,10 +37,10 @@ interface
   implementation
   implementation
 
 
     uses
     uses
-      globtype,globals,
+      globtype,globals,verbose,
       aasmbase,aasmtai,aasmdata,
       aasmbase,aasmtai,aasmdata,
       symdef,defutil,
       symdef,defutil,
-      cgbase,cgutils,cga,cgobj,hlcgobj,
+      cgbase,cgutils,cga,cgobj,hlcgobj,cgx86,
       tgobj;
       tgobj;
 
 
 {*****************************************************************************
 {*****************************************************************************
@@ -49,8 +49,10 @@ interface
 
 
     procedure tx8664addnode.second_addordinal;
     procedure tx8664addnode.second_addordinal;
     begin
     begin
-      { filter unsigned MUL opcode, which requires special handling }
+      { filter unsigned MUL opcode, which requires special handling.
+        Note that when overflow checking is off, we can use IMUL instead. }
       if (nodetype=muln) and
       if (nodetype=muln) and
+        (cs_check_overflow in current_settings.localswitches) and
         (not(is_signed(left.resultdef)) or
         (not(is_signed(left.resultdef)) or
          not(is_signed(right.resultdef))) then
          not(is_signed(right.resultdef))) then
       begin
       begin
@@ -66,59 +68,80 @@ interface
 *****************************************************************************}
 *****************************************************************************}
 
 
     procedure tx8664addnode.second_mul;
     procedure tx8664addnode.second_mul;
-
-    var reg:Tregister;
+      var
+        reg,rega,regd:Tregister;
         ref:Treference;
         ref:Treference;
         use_ref:boolean;
         use_ref:boolean;
         hl4 : tasmlabel;
         hl4 : tasmlabel;
-
-    begin
-      pass_left_right;
-
-      { The location.register will be filled in later (JM) }
-      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
-      { Mul supports registers and references, so if not register/reference,
-        load the location into a register}
-      use_ref:=false;
-      if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
-        reg:=left.location.register
-      else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
-        begin
-          ref:=left.location.reference;
-          use_ref:=true;
-        end
-      else
-        begin
-          {LOC_CONSTANT for example.}
-          reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-          hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
+        cgsize:TCgSize;
+        opsize:topsize;
+      begin
+        reference_reset(ref,0);
+        reg:=NR_NO;
+
+        cgsize:=def_cgsize(resultdef);
+        opsize:=TCGSize2OpSize[cgsize];
+        case cgsize of
+          OS_S64,OS_64:
+            begin
+              rega:=NR_RAX;
+              regd:=NR_RDX;
+            end;
+          OS_S32,OS_32:
+            begin
+              rega:=NR_EAX;
+              regd:=NR_EDX;
+            end;
+          else
+            internalerror(2013102703);
         end;
         end;
-      { Allocate RAX. }
-      cg.getcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-      { Load the right value. }
-      hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_RAX);
-      { Also allocate RDX, since it is also modified by a mul (JM). }
-      cg.getcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-      if use_ref then
-        emit_ref(A_MUL,S_Q,ref)
-      else
-        emit_reg(A_MUL,S_Q,reg);
-      if cs_check_overflow in current_settings.localswitches  then
-       begin
-         current_asmdata.getjumplabel(hl4);
-         cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
-         cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
-         cg.a_label(current_asmdata.CurrAsmList,hl4);
-       end;
-      { Free RDX,RAX }
-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-      { Allocate a new register and store the result in RAX in it. }
-      location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-      emit_reg_reg(A_MOV,S_Q,NR_RAX,location.register);
-      location_freetemp(current_asmdata.CurrAsmList,left.location);
-      location_freetemp(current_asmdata.CurrAsmList,right.location);
-    end;
+
+        pass_left_right;
+
+        { The location.register will be filled in later (JM) }
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        { Mul supports registers and references, so if not register/reference,
+          load the location into a register}
+        use_ref:=false;
+        if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
+          reg:=left.location.register
+        else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
+          begin
+            ref:=left.location.reference;
+            use_ref:=true;
+          end
+        else
+          begin
+            {LOC_CONSTANT for example.}
+            reg:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+            hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,left.location,reg);
+          end;
+        { Allocate RAX. }
+        cg.getcpuregister(current_asmdata.CurrAsmList,rega);
+        { Load the right value. }
+        hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,resultdef,right.location,rega);
+        { Also allocate RDX, since it is also modified by a mul (JM). }
+        cg.getcpuregister(current_asmdata.CurrAsmList,regd);
+        if use_ref then
+          emit_ref(A_MUL,opsize,ref)
+        else
+          emit_reg(A_MUL,opsize,reg);
+        if cs_check_overflow in current_settings.localswitches  then
+         begin
+           current_asmdata.getjumplabel(hl4);
+           cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
+           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
+           cg.a_label(current_asmdata.CurrAsmList,hl4);
+         end;
+        { Free RDX,RAX }
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
+        { Allocate a new register and store the result in RAX in it. }
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+        emit_reg_reg(A_MOV,opsize,rega,location.register);
+        location_freetemp(current_asmdata.CurrAsmList,left.location);
+        location_freetemp(current_asmdata.CurrAsmList,right.location);
+      end;
 
 
 
 
 begin
 begin

+ 44 - 78
compiler/x86_64/nx64flw.pas

@@ -135,41 +135,6 @@ procedure tx64onnode.pass_generate_code;
   end;
   end;
 
 
 { tx64tryfinallynode }
 { tx64tryfinallynode }
-var
-  seq: longint=0;
-
-
-function create_pd: tprocdef;
-  var
-    st:TSymTable;
-    checkstack: psymtablestackitem;
-    sym:tprocsym;
-  begin
-    { get actual procedure symtable (skip withsymtables, etc.) }
-    st:=nil;
-    checkstack:=symtablestack.stack;
-    while assigned(checkstack) do
-      begin
-        st:=checkstack^.symtable;
-          if st.symtabletype in [staticsymtable,globalsymtable,localsymtable] then
-            break;
-          checkstack:=checkstack^.next;
-      end;
-    { Create a nested procedure, even from main_program_level. }
-    result:=tprocdef.create(max(normal_function_level,st.symtablelevel)+1);
-    result.struct:=current_procinfo.procdef.struct;
-    result.proctypeoption:=potype_exceptfilter;
-    handle_calling_convention(result);
-    sym:=tprocsym.create('$fin$'+tostr(seq));
-    st.insert(sym);
-    inc(seq);
-
-    result.procsym:=sym;
-    proc_add_definition(result);
-    result.forwarddef:=false;
-    result.aliasnames.insert(result.mangledname);
-    alloc_proc_symbol(result);
-  end;
 
 
 function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
 function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
   begin
   begin
@@ -194,47 +159,53 @@ function copy_parasize(var n: tnode; arg: pointer): foreachnoderesult;
 constructor tx64tryfinallynode.create(l, r: TNode);
 constructor tx64tryfinallynode.create(l, r: TNode);
   begin
   begin
     inherited create(l,r);
     inherited create(l,r);
-    if (target_info.system<>system_x86_64_win64) or (
+    if (target_info.system=system_x86_64_win64) and
+       (
       { Don't create child procedures for generic methods, their nested-like
       { Don't create child procedures for generic methods, their nested-like
         behavior causes compilation errors because real nested procedures
         behavior causes compilation errors because real nested procedures
         aren't allowed for generics. Not creating them doesn't harm because
         aren't allowed for generics. Not creating them doesn't harm because
         generic node tree is discarded without generating code. }
         generic node tree is discarded without generating code. }
-        assigned(current_procinfo.procdef.struct) and
-        (df_generic in current_procinfo.procdef.struct.defoptions)
-      ) then
-      exit;
-    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
-    finalizepi.force_nested;
-    finalizepi.procdef:=create_pd;
-    finalizepi.entrypos:=r.fileinfo;
-    finalizepi.entryswitches:=r.localswitches;
-    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
-    finalizepi.exitswitches:=current_settings.localswitches;
-    { Regvar optimization for symbols is suppressed when using exceptions, but
-      temps may be still placed into registers. This must be fixed. }
-    foreachnodestatic(r,@reset_regvars,finalizepi);
+        not assigned(current_procinfo.procdef.struct) or
+        not(df_generic in current_procinfo.procdef.struct.defoptions)
+       ) then
+      begin
+        finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
+        finalizepi.force_nested;
+        finalizepi.procdef:=create_finalizer_procdef;
+        finalizepi.entrypos:=r.fileinfo;
+        finalizepi.entryswitches:=r.localswitches;
+        finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
+        finalizepi.exitswitches:=current_settings.localswitches;
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        { Regvar optimization for symbols is suppressed when using exceptions, but
+          temps may be still placed into registers. This must be fixed. }
+        foreachnodestatic(r,@reset_regvars,finalizepi);
+      end;
   end;
   end;
 
 
 constructor tx64tryfinallynode.create_implicit(l, r, _t1: TNode);
 constructor tx64tryfinallynode.create_implicit(l, r, _t1: TNode);
   begin
   begin
     inherited create_implicit(l, r, _t1);
     inherited create_implicit(l, r, _t1);
-    if (target_info.system<>system_x86_64_win64) then
-      exit;
-
-    if assigned(current_procinfo.procdef.struct) and
-      (df_generic in current_procinfo.procdef.struct.defoptions) then
-      InternalError(2013012501);
-
-    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
-    finalizepi.force_nested;
-    finalizepi.procdef:=create_pd;
-
-    finalizepi.entrypos:=current_filepos;
-    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
-    finalizepi.entryswitches:=r.localswitches;
-    finalizepi.exitswitches:=current_settings.localswitches;
-    include(finalizepi.flags,pi_do_call);
-    finalizepi.allocate_push_parasize(32);
+    if (target_info.system=system_x86_64_win64) then
+      begin
+        if assigned(current_procinfo.procdef.struct) and
+          (df_generic in current_procinfo.procdef.struct.defoptions) then
+          InternalError(2013012501);
+
+        finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
+        finalizepi.force_nested;
+        finalizepi.procdef:=create_finalizer_procdef;
+
+        finalizepi.entrypos:=current_filepos;
+        finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
+        finalizepi.entryswitches:=r.localswitches;
+        finalizepi.exitswitches:=current_settings.localswitches;
+        include(finalizepi.flags,pi_do_call);
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        finalizepi.allocate_push_parasize(32);
+      end;
   end;
   end;
 
 
 function tx64tryfinallynode.simplify(forinline: boolean): tnode;
 function tx64tryfinallynode.simplify(forinline: boolean): tnode;
@@ -352,22 +323,13 @@ procedure tx64tryfinallynode.pass_generate_code;
       encode everything into a single scope record. }
       encode everything into a single scope record. }
     if catch_frame then
     if catch_frame then
       begin
       begin
-        flowcontrol:=[fc_inflowcontrol];
-        secondpass(t1);
-        { note 1: this is not a 'finally' block, no flow restrictions apply
-          note 2: it contains autogenerated sequential code, flow away is impossible }
-        if flowcontrol<>[fc_inflowcontrol] then
-          CGMessage(cg_e_control_flow_outside_finally);
-        if codegenerror then
-          exit;
-
         if (current_procinfo.procdef.proccalloption=pocall_safecall) then
         if (current_procinfo.procdef.proccalloption=pocall_safecall) then
           begin
           begin
             handle_safecall_exception;
             handle_safecall_exception;
             cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
             cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
           end
           end
         else
         else
-          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_RERAISE_IMPLICIT',false);
+          InternalError(2014031601);
       end;
       end;
 
 
     flowcontrol:=[fc_inflowcontrol];
     flowcontrol:=[fc_inflowcontrol];
@@ -427,6 +389,10 @@ procedure tx64tryexceptnode.pass_generate_code;
     location_reset(location,LOC_VOID,OS_NO);
     location_reset(location,LOC_VOID,OS_NO);
 
 
     oldflowcontrol:=flowcontrol;
     oldflowcontrol:=flowcontrol;
+    exceptflowcontrol:=[];
+    continueexceptlabel:=nil;
+    breakexceptlabel:=nil;
+
     flowcontrol:=flowcontrol*[fc_unwind]+[fc_inflowcontrol];
     flowcontrol:=flowcontrol*[fc_unwind]+[fc_inflowcontrol];
     { this can be called recursivly }
     { this can be called recursivly }
     oldBreakLabel:=nil;
     oldBreakLabel:=nil;
@@ -495,7 +461,7 @@ procedure tx64tryexceptnode.pass_generate_code;
               InternalError(2011103101);
               InternalError(2011103101);
             { TODO: make it done without using global label }
             { TODO: make it done without using global label }
             current_asmdata.getglobaljumplabel(onlabel);
             current_asmdata.getglobaljumplabel(onlabel);
-            hlist.concat(tai_const.create_rva_sym(current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname)));
+            hlist.concat(tai_const.create_rva_sym(current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname,AT_DATA)));
             hlist.concat(tai_const.create_rva_sym(onlabel));
             hlist.concat(tai_const.create_rva_sym(onlabel));
             cg.a_label(current_asmdata.CurrAsmList,onlabel);
             cg.a_label(current_asmdata.CurrAsmList,onlabel);
             secondpass(hnode);
             secondpass(hnode);

+ 22 - 108
compiler/x86_64/nx64mat.pas

@@ -29,10 +29,6 @@ interface
       node,nmat,ncgmat,nx86mat;
       node,nmat,ncgmat,nx86mat;
 
 
     type
     type
-      tx8664moddivnode = class(tmoddivnode)
-         procedure pass_generate_code;override;
-      end;
-
       tx8664shlshrnode = class(tshlshrnode)
       tx8664shlshrnode = class(tshlshrnode)
          procedure pass_generate_code;override;
          procedure pass_generate_code;override;
       end;
       end;
@@ -55,96 +51,6 @@ implementation
       cgbase,cgutils,cga,cgobj,hlcgobj,cgx86,
       cgbase,cgutils,cga,cgobj,hlcgobj,cgx86,
       ncgutil;
       ncgutil;
 
 
-{*****************************************************************************
-                             TX8664MODDIVNODE
-*****************************************************************************}
-
-    procedure tx8664moddivnode.pass_generate_code;
-      var
-        hreg1,hreg2:Tregister;
-        power:longint;
-        op:Tasmop;
-      begin
-        secondpass(left);
-        if codegenerror then
-          exit;
-        secondpass(right);
-        if codegenerror then
-          exit;
-
-        { put numerator in register }
-        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
-        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
-        hreg1:=left.location.register;
-
-        if (nodetype=divn) and (right.nodetype=ordconstn) and
-           ispowerof2(int64(tordconstnode(right).value),power) then
-          begin
-            { for signed numbers, the numerator must be adjusted before the
-              shift instruction, but not wih unsigned numbers! Otherwise,
-              "Cardinal($ffffffff) div 16" overflows! (JM) }
-            if is_signed(left.resultdef) Then
-              begin
-                  { use a sequence without jumps, saw this in
-                    comp.compilers (JM) }
-                  { no jumps, but more operations }
-                  hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-                  emit_reg_reg(A_MOV,S_Q,hreg1,hreg2);
-                  {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
-                  emit_const_reg(A_SAR,S_Q,63,hreg2);
-                  {If signed, hreg2=right value-1, otherwise 0.}
-                  { (don't use emit_const_reg, because if value>high(longint)
-                     then it must first be loaded into a register) }
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_S64,tordconstnode(right).value-1,hreg2);
-                  { add to the left value }
-                  emit_reg_reg(A_ADD,S_Q,hreg2,hreg1);
-                  { do the shift }
-                  emit_const_reg(A_SAR,S_Q,power,hreg1);
-              end
-            else
-              emit_const_reg(A_SHR,S_Q,power,hreg1);
-            location.register:=hreg1;
-          end
-        else
-          begin
-            {Bring denominator to a register.}
-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-            emit_reg_reg(A_MOV,S_Q,hreg1,NR_RAX);
-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-            {Sign extension depends on the left type.}
-            if torddef(left.resultdef).ordtype=u64bit then
-              emit_reg_reg(A_XOR,S_Q,NR_RDX,NR_RDX)
-            else
-              emit_none(A_CQO,S_NO);
-
-            {Division depends on the right type.}
-            if Torddef(right.resultdef).ordtype=u64bit then
-              op:=A_DIV
-            else
-              op:=A_IDIV;
-
-            if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
-              emit_ref(op,S_Q,right.location.reference)
-            else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
-              emit_reg(op,S_Q,right.location.register)
-            else
-              begin
-                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
-                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u64inttype,right.location,hreg1);
-                emit_reg(op,S_Q,hreg1);
-              end;
-
-            { Copy the result into a new register. Release RAX & RDX.}
-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-            location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-            if nodetype=divn then
-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_RAX,location.register)
-            else
-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_RDX,location.register);
-          end;
-      end;
-
 
 
 {*****************************************************************************
 {*****************************************************************************
                              TX8664SHLRSHRNODE
                              TX8664SHLRSHRNODE
@@ -153,7 +59,7 @@ implementation
 
 
     procedure tx8664shlshrnode.pass_generate_code;
     procedure tx8664shlshrnode.pass_generate_code;
       var
       var
-        op : Tasmop;
+        op : topcg;
         opsize : tcgsize;
         opsize : tcgsize;
         mask : aint;
         mask : aint;
       begin
       begin
@@ -162,9 +68,9 @@ implementation
 
 
         { determine operator }
         { determine operator }
         if nodetype=shln then
         if nodetype=shln then
-          op:=A_SHL
+          op:=OP_SHL
         else
         else
-          op:=A_SHR;
+          op:=OP_SHR;
 
 
         { special treatment of 32bit values for backwards compatibility }
         { special treatment of 32bit values for backwards compatibility }
         { mul optimizations require to keep the sign (FK) }
         { mul optimizations require to keep the sign (FK) }
@@ -186,28 +92,36 @@ implementation
           end;
           end;
 
 
         { load left operators in a register }
         { load left operators in a register }
-        location_copy(location,left.location);
-        hlcg.location_force_reg(current_asmdata.CurrAsmList,location,left.resultdef,cgsize_orddef(opsize),false);
+        if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+          { location_force_reg can be also used to change the size of a register }
+          (left.location.size<>opsize) then
+          hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
+        location_reset(location,LOC_REGISTER,opsize);
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
 
 
         { shifting by a constant directly coded: }
         { shifting by a constant directly coded: }
         if (right.nodetype=ordconstn) then
         if (right.nodetype=ordconstn) then
-          emit_const_reg(op,tcgsize2opsize[opsize],tordconstnode(right).value and mask,location.register)
+          cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,op,location.size,
+            tordconstnode(right).value.uvalue and mask,left.location.register,location.register)
         else
         else
           begin
           begin
-            { load right operators in a RCX }
-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RCX);
-            hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_RCX);
-
-            { right operand is in ECX }
-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RCX);
-            emit_reg_reg(op,tcgsize2opsize[opsize],NR_CL,location.register);
+            { load right operators in a register - this
+              is done since most target cpu which will use this
+              node do not support a shift count in a mem. location (cec)
+            }
+            if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+               { location_force_reg can be also used to change the size of a register }
+              (right.location.size<>opsize) then
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,cgsize_orddef(opsize),true);
+
+            cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,op,opsize,right.location.register,left.location.register,location.register);
           end;
           end;
       end;
       end;
 
 
 
 
 begin
 begin
    cunaryminusnode:=tx8664unaryminusnode;
    cunaryminusnode:=tx8664unaryminusnode;
-   cmoddivnode:=tx8664moddivnode;
+   cmoddivnode:=tx86moddivnode;
    cshlshrnode:=tx8664shlshrnode;
    cshlshrnode:=tx8664shlshrnode;
    cnotnode:=tx8664notnode;
    cnotnode:=tx8664notnode;
 end.
 end.

+ 111 - 2
compiler/x86_64/nx64set.pas

@@ -1,7 +1,7 @@
 {
 {
     Copyright (c) 1998-2002 by Florian Klaempfl
     Copyright (c) 1998-2002 by Florian Klaempfl
 
 
-    Generate i386 assembler for in set/case nodes
+    Generate x86_64 assembler for in set/case nodes
 
 
     This program is free software; you can redistribute it and/or modify
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     it under the terms of the GNU General Public License as published by
@@ -32,13 +32,26 @@ interface
     type
     type
       tx8664casenode = class(tx86casenode)
       tx8664casenode = class(tx86casenode)
          procedure optimizevalues(var max_linear_list:aint;var max_dist:aword);override;
          procedure optimizevalues(var max_linear_list:aint;var max_dist:aword);override;
+         procedure genjumptable(hp : pcaselabel;min_,max_ : aint);override;
       end;
       end;
 
 
 
 
 implementation
 implementation
 
 
+    uses
+      systems,
+      verbose,globals,constexp,
+      symconst,symdef,defutil,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      cgbase,pass_2,
+      ncon,
+      cpubase,cpuinfo,procinfo,
+      cga,cgutils,cgobj,ncgutil,
+      cgx86;
+
+
 {*****************************************************************************
 {*****************************************************************************
-                            TI386CASENODE
+                            TX8664CASENODE
 *****************************************************************************}
 *****************************************************************************}
 
 
     procedure tx8664casenode.optimizevalues(var max_linear_list:aint;var max_dist:aword);
     procedure tx8664casenode.optimizevalues(var max_linear_list:aint;var max_dist:aword);
@@ -46,6 +59,102 @@ implementation
         inc(max_linear_list,9);
         inc(max_linear_list,9);
       end;
       end;
 
 
+
+    { Always generate position-independent jump table, it is twice less in size at a price
+      of two extra instructions (which shouldn't cause more slowdown than pipeline trashing) }
+    procedure tx8664casenode.genjumptable(hp : pcaselabel; min_,max_ : aint);
+      var
+        last: TConstExprInt;
+        tablelabel: TAsmLabel;
+        basereg,indexreg,jumpreg: TRegister;
+        href: TReference;
+        opcgsize: tcgsize;
+        sectype: TAsmSectiontype;
+        jtitemconsttype: taiconst_type;
+
+      procedure genitem(list:TAsmList;t : pcaselabel);
+        var
+          i : aint;
+        begin
+          if assigned(t^.less) then
+            genitem(list,t^.less);
+          { fill possible hole }
+          i:=last.svalue+1;
+          while i<=t^._low.svalue-1 do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
+              inc(i);
+            end;
+          i:=t^._low.svalue;
+          while i<=t^._high.svalue do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
+              inc(i);
+            end;
+          last:=t^._high;
+          if assigned(t^.greater) then
+            genitem(list,t^.greater);
+        end;
+
+      begin
+        if not(target_info.system in systems_darwin) then
+          jtitemconsttype:=aitconst_32bit
+        else
+          { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
+          jtitemconsttype:=aitconst_darwin_dwarf_delta32;
+
+        last:=min_;
+        opcgsize:=def_cgsize(opsize);
+        if not(jumptable_no_range) then
+          begin
+             { a <= x <= b <-> unsigned(x-a) <= (b-a) }
+             cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
+             { case expr greater than max_ => goto elselabel }
+             cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
+             min_:=0;
+          end;
+        { local label in order to avoid using GOT }
+        current_asmdata.getlabel(tablelabel,alt_data);
+        indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
+        { load table address }
+        reference_reset_symbol(href,tablelabel,0,4);
+        basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
+        { load table slot, 32-bit sign extended }
+        reference_reset_base(href,basereg,-aint(min_)*4,4);
+        href.index:=indexreg;
+        href.scalefactor:=4;
+        jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
+        { add table address }
+        reference_reset_base(href,basereg,0,sizeof(pint));
+        href.index:=jumpreg;
+        href.scalefactor:=1;
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,jumpreg);
+        { and finally jump }
+        emit_reg(A_JMP,S_NO,jumpreg);
+        { generate jump table }
+        if not(target_info.system in systems_darwin) then
+          sectype:=sec_rodata
+        else
+          { on Mac OS X, dead code stripping ("smart linking") happens based on
+            global symbols: every global/static symbol (symbols that do not
+            start with "L") marks the start of a new "subsection" that is
+            discarded by the linker if there are no references to this symbol.
+            This means that if you put the jump table in the rodata section, it
+            will become part of the block of data associated with the previous
+            non-L-label in the rodata section and stay or be thrown away
+            depending on whether that block of data is referenced. Therefore,
+            jump tables must be added in the code section and since aktlocaldata
+            is inserted right after the routine, it will become part of the
+            same subsection that contains the routine's code }
+          sectype:=sec_code;
+        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
+        current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
+        genitem(current_procinfo.aktlocaldata,hp);
+      end;
+
 begin
 begin
    ccasenode:=tx8664casenode;
    ccasenode:=tx8664casenode;
 end.
 end.

+ 10 - 10
compiler/x86_64/r8664ari.inc

@@ -9,22 +9,22 @@
 18,
 18,
 7,
 7,
 6,
 6,
-83,
 84,
 84,
 85,
 85,
 86,
 86,
-71,
+87,
+70,
 8,
 8,
 12,
 12,
 26,
 26,
 25,
 25,
 11,
 11,
-77,
 78,
 78,
 79,
 79,
 80,
 80,
 81,
 81,
 82,
 82,
+83,
 72,
 72,
 13,
 13,
 4,
 4,
@@ -33,13 +33,13 @@
 9,
 9,
 27,
 27,
 14,
 14,
-70,
-73,
+77,
+69,
 23,
 23,
 35,
 35,
-92,
 75,
 75,
-76,
+73,
+74,
 102,
 102,
 103,
 103,
 104,
 104,
@@ -86,14 +86,14 @@
 10,
 10,
 28,
 28,
 15,
 15,
-69,
+76,
 24,
 24,
 36,
 36,
 22,
 22,
 21,
 21,
 34,
 34,
 33,
 33,
-74,
+71,
 101,
 101,
 93,
 93,
 94,
 94,
@@ -103,11 +103,11 @@
 98,
 98,
 99,
 99,
 100,
 100,
-87,
 88,
 88,
 89,
 89,
 90,
 90,
 91,
 91,
+92,
 110,
 110,
 111,
 111,
 120,
 120,

+ 5 - 5
compiler/x86_64/r8664att.inc

@@ -68,14 +68,15 @@
 '%r15b',
 '%r15b',
 '%r15w',
 '%r15w',
 '%r15d',
 '%r15d',
-'%rip',
-'%eip',
-'%cs',
-'%ds',
 '%es',
 '%es',
+'%cs',
 '%ss',
 '%ss',
+'%ds',
 '%fs',
 '%fs',
 '%gs',
 '%gs',
+'%flags',
+'%rip',
+'%eip',
 '%dr0',
 '%dr0',
 '%dr1',
 '%dr1',
 '%dr2',
 '%dr2',
@@ -91,7 +92,6 @@
 '%tr5',
 '%tr5',
 '%tr6',
 '%tr6',
 '%tr7',
 '%tr7',
-'%flags',
 '%st(0)',
 '%st(0)',
 '%st(1)',
 '%st(1)',
 '%st(2)',
 '%st(2)',

+ 23 - 23
compiler/x86_64/r8664con.inc

@@ -68,30 +68,30 @@ NR_R15 = tregister($0105000f);
 NR_R15L = tregister($0101000f);
 NR_R15L = tregister($0101000f);
 NR_R15W = tregister($0103000f);
 NR_R15W = tregister($0103000f);
 NR_R15D = tregister($0104000f);
 NR_R15D = tregister($0104000f);
-NR_RIP = tregister($05050000);
-NR_EIP = tregister($05040000);
+NR_ES = tregister($05000000);
 NR_CS = tregister($05000001);
 NR_CS = tregister($05000001);
-NR_DS = tregister($05000002);
-NR_ES = tregister($05000003);
-NR_SS = tregister($05000004);
-NR_FS = tregister($05000005);
-NR_GS = tregister($05000006);
-NR_DR0 = tregister($05000007);
-NR_DR1 = tregister($05000008);
-NR_DR2 = tregister($05000009);
-NR_DR3 = tregister($0500000a);
-NR_DR6 = tregister($0500000b);
-NR_DR7 = tregister($0500000c);
-NR_CR0 = tregister($0500000d);
-NR_CR2 = tregister($0500000e);
-NR_CR3 = tregister($0500000f);
-NR_CR4 = tregister($05000010);
-NR_TR3 = tregister($05000011);
-NR_TR4 = tregister($05000012);
-NR_TR5 = tregister($05000013);
-NR_TR6 = tregister($05000014);
-NR_TR7 = tregister($05000015);
-NR_FLAGS = tregister($05000016);
+NR_SS = tregister($05000002);
+NR_DS = tregister($05000003);
+NR_FS = tregister($05000004);
+NR_GS = tregister($05000005);
+NR_FLAGS = tregister($05000006);
+NR_RIP = tregister($05050007);
+NR_EIP = tregister($05040007);
+NR_DR0 = tregister($05000008);
+NR_DR1 = tregister($05000009);
+NR_DR2 = tregister($0500000a);
+NR_DR3 = tregister($0500000b);
+NR_DR6 = tregister($0500000d);
+NR_DR7 = tregister($0500000e);
+NR_CR0 = tregister($05000010);
+NR_CR2 = tregister($05000012);
+NR_CR3 = tregister($05000013);
+NR_CR4 = tregister($05000014);
+NR_TR3 = tregister($0500001b);
+NR_TR4 = tregister($0500001c);
+NR_TR5 = tregister($0500001d);
+NR_TR6 = tregister($0500001e);
+NR_TR7 = tregister($0500001f);
 NR_ST0 = tregister($02000000);
 NR_ST0 = tregister($02000000);
 NR_ST1 = tregister($02000001);
 NR_ST1 = tregister($02000001);
 NR_ST2 = tregister($02000002);
 NR_ST2 = tregister($02000002);

+ 2 - 2
compiler/x86_64/r8664dwrf.inc

@@ -68,8 +68,6 @@
 15,
 15,
 15,
 15,
 15,
 15,
-16,
-16,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
@@ -77,6 +75,8 @@
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
+16,
+16,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,

+ 5 - 5
compiler/x86_64/r8664int.inc

@@ -68,14 +68,15 @@
 'r15b',
 'r15b',
 'r15w',
 'r15w',
 'r15d',
 'r15d',
-'rip',
-'eip',
-'cs',
-'ds',
 'es',
 'es',
+'cs',
 'ss',
 'ss',
+'ds',
 'fs',
 'fs',
 'gs',
 'gs',
+'flags',
+'rip',
+'eip',
 'dr0',
 'dr0',
 'dr1',
 'dr1',
 'dr2',
 'dr2',
@@ -91,7 +92,6 @@
 'tr5',
 'tr5',
 'tr6',
 'tr6',
 'tr7',
 'tr7',
-'flags',
 'st(0)',
 'st(0)',
 'st(1)',
 'st(1)',
 'st(2)',
 'st(2)',

+ 10 - 10
compiler/x86_64/r8664iri.inc

@@ -10,22 +10,22 @@
 18,
 18,
 7,
 7,
 6,
 6,
-83,
 84,
 84,
 85,
 85,
 86,
 86,
-71,
+87,
+70,
 8,
 8,
 12,
 12,
 26,
 26,
 25,
 25,
 11,
 11,
-77,
 78,
 78,
 79,
 79,
 80,
 80,
 81,
 81,
 82,
 82,
+83,
 72,
 72,
 13,
 13,
 4,
 4,
@@ -34,13 +34,13 @@
 9,
 9,
 27,
 27,
 14,
 14,
-70,
-73,
+77,
+69,
 23,
 23,
 35,
 35,
-92,
 75,
 75,
-76,
+73,
+74,
 102,
 102,
 103,
 103,
 104,
 104,
@@ -87,14 +87,14 @@
 10,
 10,
 28,
 28,
 15,
 15,
-69,
+76,
 24,
 24,
 36,
 36,
 22,
 22,
 21,
 21,
 34,
 34,
 33,
 33,
-74,
+71,
 101,
 101,
 93,
 93,
 94,
 94,
@@ -104,11 +104,11 @@
 98,
 98,
 99,
 99,
 100,
 100,
-87,
 88,
 88,
 89,
 89,
 90,
 90,
 91,
 91,
+92,
 110,
 110,
 111,
 111,
 120,
 120,

+ 8 - 8
compiler/x86_64/r8664num.inc

@@ -68,30 +68,30 @@ tregister($0105000f),
 tregister($0101000f),
 tregister($0101000f),
 tregister($0103000f),
 tregister($0103000f),
 tregister($0104000f),
 tregister($0104000f),
-tregister($05050000),
-tregister($05040000),
+tregister($05000000),
 tregister($05000001),
 tregister($05000001),
 tregister($05000002),
 tregister($05000002),
 tregister($05000003),
 tregister($05000003),
 tregister($05000004),
 tregister($05000004),
 tregister($05000005),
 tregister($05000005),
 tregister($05000006),
 tregister($05000006),
-tregister($05000007),
+tregister($05050007),
+tregister($05040007),
 tregister($05000008),
 tregister($05000008),
 tregister($05000009),
 tregister($05000009),
 tregister($0500000a),
 tregister($0500000a),
 tregister($0500000b),
 tregister($0500000b),
-tregister($0500000c),
 tregister($0500000d),
 tregister($0500000d),
 tregister($0500000e),
 tregister($0500000e),
-tregister($0500000f),
 tregister($05000010),
 tregister($05000010),
-tregister($05000011),
 tregister($05000012),
 tregister($05000012),
 tregister($05000013),
 tregister($05000013),
 tregister($05000014),
 tregister($05000014),
-tregister($05000015),
-tregister($05000016),
+tregister($0500001b),
+tregister($0500001c),
+tregister($0500001d),
+tregister($0500001e),
+tregister($0500001f),
 tregister($02000000),
 tregister($02000000),
 tregister($02000001),
 tregister($02000001),
 tregister($02000002),
 tregister($02000002),

+ 0 - 143
compiler/x86_64/r8664op.inc

@@ -1,143 +0,0 @@
-{ don't edit, this file is generated from x86reg.dat }
-0,
-0,
-4,
-0,
-0,
-0,
-1,
-5,
-1,
-1,
-1,
-2,
-6,
-2,
-2,
-2,
-3,
-7,
-3,
-3,
-3,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-5,
-5,
-5,
-5,
-4,
-4,
-4,
-4,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-0,
-0,
-1,
-3,
-0,
-2,
-4,
-5,
-0,
-1,
-2,
-3,
-6,
-7,
-0,
-2,
-3,
-4,
-3,
-4,
-5,
-6,
-7,
-0,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7

+ 4 - 4
compiler/x86_64/r8664ot.inc

@@ -68,14 +68,15 @@ OT_REG64,
 OT_REG8,
 OT_REG8,
 OT_REG16,
 OT_REG16,
 OT_REG32,
 OT_REG32,
-OT_NONE,
-OT_NONE,
-OT_REG_CS,
 OT_REG_DESS,
 OT_REG_DESS,
+OT_REG_CS,
 OT_REG_DESS,
 OT_REG_DESS,
 OT_REG_DESS,
 OT_REG_DESS,
 OT_REG_FSGS,
 OT_REG_FSGS,
 OT_REG_FSGS,
 OT_REG_FSGS,
+OT_NONE,
+OT_NONE,
+OT_NONE,
 OT_REG_DREG,
 OT_REG_DREG,
 OT_REG_DREG,
 OT_REG_DREG,
 OT_REG_DREG,
 OT_REG_DREG,
@@ -91,7 +92,6 @@ OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
-OT_NONE,
 OT_FPU0,
 OT_FPU0,
 OT_FPUREG,
 OT_FPUREG,
 OT_FPUREG,
 OT_FPUREG,

+ 4 - 4
compiler/x86_64/r8664rni.inc

@@ -117,13 +117,13 @@
 139,
 139,
 140,
 140,
 141,
 141,
+69,
+70,
 71,
 71,
 72,
 72,
 73,
 73,
 74,
 74,
 75,
 75,
-76,
-77,
 78,
 78,
 79,
 79,
 80,
 80,
@@ -139,5 +139,5 @@
 90,
 90,
 91,
 91,
 92,
 92,
-70,
-69
+77,
+76

+ 10 - 10
compiler/x86_64/r8664sri.inc

@@ -10,22 +10,22 @@
 18,
 18,
 7,
 7,
 6,
 6,
-83,
 84,
 84,
 85,
 85,
 86,
 86,
-71,
+87,
+70,
 8,
 8,
 12,
 12,
 26,
 26,
 25,
 25,
 11,
 11,
-77,
 78,
 78,
 79,
 79,
 80,
 80,
 81,
 81,
 82,
 82,
+83,
 72,
 72,
 13,
 13,
 4,
 4,
@@ -34,13 +34,13 @@
 9,
 9,
 27,
 27,
 14,
 14,
-70,
-73,
+77,
+69,
 23,
 23,
 35,
 35,
-92,
 75,
 75,
-76,
+73,
+74,
 102,
 102,
 103,
 103,
 104,
 104,
@@ -87,14 +87,14 @@
 10,
 10,
 28,
 28,
 15,
 15,
-69,
+76,
 24,
 24,
 36,
 36,
 22,
 22,
 21,
 21,
 34,
 34,
 33,
 33,
-74,
+71,
 101,
 101,
 93,
 93,
 94,
 94,
@@ -104,11 +104,11 @@
 98,
 98,
 99,
 99,
 100,
 100,
-87,
 88,
 88,
 89,
 89,
 90,
 90,
 91,
 91,
+92,
 110,
 110,
 111,
 111,
 120,
 120,

+ 2 - 2
compiler/x86_64/r8664stab.inc

@@ -68,8 +68,6 @@
 15,
 15,
 15,
 15,
 15,
 15,
-16,
-16,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
@@ -77,6 +75,8 @@
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
+16,
+16,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,

+ 5 - 5
compiler/x86_64/r8664std.inc

@@ -68,14 +68,15 @@
 'r15b',
 'r15b',
 'r15w',
 'r15w',
 'r15d',
 'r15d',
-'rip',
-'eip',
-'cs',
-'ds',
 'es',
 'es',
+'cs',
 'ss',
 'ss',
+'ds',
 'fs',
 'fs',
 'gs',
 'gs',
+'flags',
+'rip',
+'eip',
 'dr0',
 'dr0',
 'dr1',
 'dr1',
 'dr2',
 'dr2',
@@ -91,7 +92,6 @@
 'tr5',
 'tr5',
 'tr6',
 'tr6',
 'tr7',
 'tr7',
-'flags',
 'st(0)',
 'st(0)',
 'st(1)',
 'st(1)',
 'st(2)',
 'st(2)',

+ 1 - 1
compiler/x86_64/rgcpu.pas

@@ -47,7 +47,7 @@ unit rgcpu;
         supreg:=getsupreg(reg);
         supreg:=getsupreg(reg);
         { All registers conflict with rsp/rbp }
         { All registers conflict with rsp/rbp }
         add_edge(supreg,RS_RSP);
         add_edge(supreg,RS_RSP);
-        add_edge(supreg,RS_RBP);
+        // add_edge(supreg,RS_RBP);
       end;
       end;
 
 
 end.
 end.

+ 211 - 0
compiler/x86_64/symcpu.pas

@@ -0,0 +1,211 @@
+{
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Symbol table overrides for x86_64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit symcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  symtype,symdef,symsym,symx86;
+
+type
+  { defs }
+  tcpufiledef = class(tfiledef)
+  end;
+  tcpufiledefclass = class of tcpufiledef;
+
+  tcpuvariantdef = class(tvariantdef)
+  end;
+  tcpuvariantdefclass = class of tcpuvariantdef;
+
+  tcpuformaldef = class(tformaldef)
+  end;
+  tcpuformaldefclass = class of tcpuformaldef;
+
+  tcpuforwarddef = class(tforwarddef)
+  end;
+  tcpuforwarddefclass = class of tcpuforwarddef;
+
+  tcpuundefineddef = class(tundefineddef)
+  end;
+  tcpuundefineddefclass = class of tcpuundefineddef;
+
+  tcpuerrordef = class(terrordef)
+  end;
+  tcpuerrordefclass = class of tcpuerrordef;
+
+  tcpupointerdef = class(tx86pointerdef)
+  end;
+  tcpupointerdefclass = class of tcpupointerdef;
+
+  tcpurecorddef = class(trecorddef)
+  end;
+  tcpurecorddefclass = class of tcpurecorddef;
+
+  tcpuimplementedinterface = class(timplementedinterface)
+  end;
+  tcpuimplementedinterfaceclass = class of tcpuimplementedinterface;
+
+  tcpuobjectdef = class(tobjectdef)
+  end;
+  tcpuobjectdefclass = class of tcpuobjectdef;
+
+  tcpuclassrefdef = class(tclassrefdef)
+  end;
+  tcpuclassrefdefclass = class of tcpuclassrefdef;
+
+  tcpuarraydef = class(tarraydef)
+  end;
+  tcpuarraydefclass = class of tcpuarraydef;
+
+  tcpuorddef = class(torddef)
+  end;
+  tcpuorddefclass = class of tcpuorddef;
+
+  tcpufloatdef = class(tfloatdef)
+  end;
+  tcpufloatdefclass = class of tcpufloatdef;
+
+  tcpuprocvardef = class(tprocvardef)
+  end;
+  tcpuprocvardefclass = class of tcpuprocvardef;
+
+  tcpuprocdef = class(tprocdef)
+  end;
+  tcpuprocdefclass = class of tcpuprocdef;
+
+  tcpustringdef = class(tstringdef)
+  end;
+  tcpustringdefclass = class of tcpustringdef;
+
+  tcpuenumdef = class(tenumdef)
+  end;
+  tcpuenumdefclass = class of tcpuenumdef;
+
+  tcpusetdef = class(tsetdef)
+  end;
+  tcpusetdefclass = class of tcpusetdef;
+
+  { syms }
+  tcpulabelsym = class(tlabelsym)
+  end;
+  tcpulabelsymclass = class of tcpulabelsym;
+
+  tcpuunitsym = class(tunitsym)
+  end;
+  tcpuunitsymclass = class of tcpuunitsym;
+
+  tcpunamespacesym = class(tnamespacesym)
+  end;
+  tcpunamespacesymclass = class of tcpunamespacesym;
+
+  tcpuprocsym = class(tprocsym)
+  end;
+  tcpuprocsymclass = class of tcpuprocsym;
+
+  tcputypesym = class(ttypesym)
+  end;
+  tcpuypesymclass = class of tcputypesym;
+
+  tcpufieldvarsym = class(tfieldvarsym)
+  end;
+  tcpufieldvarsymclass = class of tcpufieldvarsym;
+
+  tcpulocalvarsym = class(tlocalvarsym)
+  end;
+  tcpulocalvarsymclass = class of tcpulocalvarsym;
+
+  tcpuparavarsym = class(tparavarsym)
+  end;
+  tcpuparavarsymclass = class of tcpuparavarsym;
+
+  tcpustaticvarsym = class(tstaticvarsym)
+  end;
+  tcpustaticvarsymclass = class of tcpustaticvarsym;
+
+  tcpuabsolutevarsym = class(tabsolutevarsym)
+  end;
+  tcpuabsolutevarsymclass = class of tcpuabsolutevarsym;
+
+  tcpupropertysym = class(tpropertysym)
+  end;
+  tcpupropertysymclass = class of tcpupropertysym;
+
+  tcpuconstsym = class(tconstsym)
+  end;
+  tcpuconstsymclass = class of tcpuconstsym;
+
+  tcpuenumsym = class(tenumsym)
+  end;
+  tcpuenumsymclass = class of tcpuenumsym;
+
+  tcpusyssym = class(tsyssym)
+  end;
+  tcpusyssymclass = class of tcpusyssym;
+
+
+const
+  pbestrealtype : ^tdef = @s80floattype;
+
+
+implementation
+
+begin
+  { used tdef classes }
+  cfiledef:=tcpufiledef;
+  cvariantdef:=tcpuvariantdef;
+  cformaldef:=tcpuformaldef;
+  cforwarddef:=tcpuforwarddef;
+  cundefineddef:=tcpuundefineddef;
+  cerrordef:=tcpuerrordef;
+  cpointerdef:=tcpupointerdef;
+  crecorddef:=tcpurecorddef;
+  cimplementedinterface:=tcpuimplementedinterface;
+  cobjectdef:=tcpuobjectdef;
+  cclassrefdef:=tcpuclassrefdef;
+  carraydef:=tcpuarraydef;
+  corddef:=tcpuorddef;
+  cfloatdef:=tcpufloatdef;
+  cprocvardef:=tcpuprocvardef;
+  cprocdef:=tcpuprocdef;
+  cstringdef:=tcpustringdef;
+  cenumdef:=tcpuenumdef;
+  csetdef:=tcpusetdef;
+
+  { used tsym classes }
+  clabelsym:=tcpulabelsym;
+  cunitsym:=tcpuunitsym;
+  cnamespacesym:=tcpunamespacesym;
+  cprocsym:=tcpuprocsym;
+  ctypesym:=tcputypesym;
+  cfieldvarsym:=tcpufieldvarsym;
+  clocalvarsym:=tcpulocalvarsym;
+  cparavarsym:=tcpuparavarsym;
+  cstaticvarsym:=tcpustaticvarsym;
+  cabsolutevarsym:=tcpuabsolutevarsym;
+  cpropertysym:=tcpupropertysym;
+  cconstsym:=tcpuconstsym;
+  cenumsym:=tcpuenumsym;
+  csyssym:=tcpusyssym;
+end.
+

+ 70 - 21
compiler/x86_64/x8664ats.inc

@@ -1,12 +1,6 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
 (
 (
 attsufNONE,
 attsufNONE,
-attsufNONE,
-attsufINT,
-attsufINT,
-attsufNONE,
-attsufINT,
-attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
@@ -35,8 +29,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufNONE,
-attsufNONE,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufNONE,
 attsufNONE,
@@ -156,7 +148,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
@@ -164,19 +155,16 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
-attsufINT,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
-attsufINT,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
@@ -266,10 +254,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
-attsufINT,
-attsufNONE,
-attsufNONE,
-attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
@@ -300,10 +284,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
-attsufINT,
-attsufNONE,
-attsufNONE,
-attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
@@ -329,7 +309,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
-attsufNONE,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufNONE,
 attsufNONE,
@@ -969,5 +948,75 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 71 - 22
compiler/x86_64/x8664att.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
 (
 (
 'none',
 'none',
-'aaa',
-'aad',
-'aam',
-'aas',
 'adc',
 'adc',
 'add',
 'add',
 'and',
 'and',
-'arpl',
-'bound',
 'bsf',
 'bsf',
 'bsr',
 'bsr',
 'bswap',
 'bswap',
@@ -35,8 +29,6 @@
 'cpuid',
 'cpuid',
 'cwd',
 'cwd',
 'cwtl',
 'cwtl',
-'daa',
-'das',
 'dec',
 'dec',
 'div',
 'div',
 'emms',
 'emms',
@@ -155,24 +147,20 @@
 'int1',
 'int1',
 'int03',
 'int03',
 'int3',
 'int3',
-'into',
 'invd',
 'invd',
 'invlpg',
 'invlpg',
 'iret',
 'iret',
 'iret',
 'iret',
 'iretw',
 'iretw',
 'iretq',
 'iretq',
-'jcxz',
 'jecxz',
 'jecxz',
 'jrcxz',
 'jrcxz',
 'jmp',
 'jmp',
 'lahf',
 'lahf',
 'lar',
 'lar',
 'lcall',
 'lcall',
-'lds',
 'lea',
 'lea',
 'leave',
 'leave',
-'les',
 'lfs',
 'lfs',
 'lgdt',
 'lgdt',
 'lgs',
 'lgs',
@@ -266,11 +254,7 @@
 'pmvnzb',
 'pmvnzb',
 'pmvzb',
 'pmvzb',
 'pop',
 'pop',
-'popa',
-'popal',
-'popaw',
 'popf',
 'popf',
-'popfl',
 'popfw',
 'popfw',
 'popfq',
 'popfq',
 'por',
 'por',
@@ -300,11 +284,7 @@
 'punpckldq',
 'punpckldq',
 'punpcklwd',
 'punpcklwd',
 'push',
 'push',
-'pusha',
-'pushal',
-'pushaw',
 'pushf',
 'pushf',
-'pushfl',
 'pushfw',
 'pushfw',
 'pushfq',
 'pushfq',
 'pxor',
 'pxor',
@@ -329,7 +309,6 @@
 'rsm',
 'rsm',
 'sahf',
 'sahf',
 'sal',
 'sal',
-'salc',
 'sar',
 'sar',
 'sbb',
 'sbb',
 'scasb',
 'scasb',
@@ -946,6 +925,7 @@
 'vzeroupper',
 'vzeroupper',
 'andn',
 'andn',
 'bextr',
 'bextr',
+'tzcnt',
 'rorx',
 'rorx',
 'sarx',
 'sarx',
 'shlx',
 'shlx',
@@ -969,5 +949,74 @@
 'vpsllvq',
 'vpsllvq',
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
-'vpsrlvq'
+'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
+'vfmadd132pd',
+'vfmadd213pd',
+'vfmadd231pd',
+'vfmaddpd',
+'vfmadd132ps',
+'vfmadd213ps',
+'vfmadd231ps',
+'vfmadd132sd',
+'vfmadd213sd',
+'vfmadd231sd',
+'vfmadd132ss',
+'vfmadd213ss',
+'vfmadd231ss',
+'vfmaddsub132pd',
+'vfmaddsub213pd',
+'vfmaddsub231pd',
+'vfmaddsub132ps',
+'vfmaddsub213ps',
+'vfmaddsub231ps',
+'vfmsubadd132pd',
+'vfmsubadd213pd',
+'vfmsubadd231pd',
+'vfmsubadd132ps',
+'vfmsubadd213ps',
+'vfmsubadd231ps',
+'vfmsub132pd',
+'vfmsub213pd',
+'vfmsub231pd',
+'vfmsub132ps',
+'vfmsub213ps',
+'vfmsub231ps',
+'vfmsub132sd',
+'vfmsub213sd',
+'vfmsub231sd',
+'vfmsub132ss',
+'vfmsub213ss',
+'vfmsub231ss',
+'vfnmadd132pd',
+'vfnmadd213pd',
+'vfnmadd231pd',
+'vfnmadd132ps',
+'vfnmadd213ps',
+'vfnmadd231ps',
+'vfnmadd132sd',
+'vfnmadd213sd',
+'vfnmadd231sd',
+'vfnmadd132ss',
+'vfnmadd213ss',
+'vfnmadd231ss',
+'vfnmsub132pd',
+'vfnmsub213pd',
+'vfnmsub231pd',
+'vfnmsub132ps',
+'vfnmsub213ps',
+'vfnmsub231ps',
+'vfnmsub132sd',
+'vfnmsub213sd',
+'vfnmsub231sd',
+'vfnmsub132ss',
+'vfnmsub213ss',
+'vfnmsub231ss'
 );
 );

+ 71 - 22
compiler/x86_64/x8664int.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
 (
 (
 'none',
 'none',
-'aaa',
-'aad',
-'aam',
-'aas',
 'adc',
 'adc',
 'add',
 'add',
 'and',
 'and',
-'arpl',
-'bound',
 'bsf',
 'bsf',
 'bsr',
 'bsr',
 'bswap',
 'bswap',
@@ -35,8 +29,6 @@
 'cpuid',
 'cpuid',
 'cwd',
 'cwd',
 'cwde',
 'cwde',
-'daa',
-'das',
 'dec',
 'dec',
 'div',
 'div',
 'emms',
 'emms',
@@ -155,24 +147,20 @@
 'int1',
 'int1',
 'int03',
 'int03',
 'int3',
 'int3',
-'into',
 'invd',
 'invd',
 'invlpg',
 'invlpg',
 'iret',
 'iret',
 'iretd',
 'iretd',
 'iretw',
 'iretw',
 'iretq',
 'iretq',
-'jcxz',
 'jecxz',
 'jecxz',
 'jrcxz',
 'jrcxz',
 'jmp',
 'jmp',
 'lahf',
 'lahf',
 'lar',
 'lar',
 'lcall',
 'lcall',
-'lds',
 'lea',
 'lea',
 'leave',
 'leave',
-'les',
 'lfs',
 'lfs',
 'lgdt',
 'lgdt',
 'lgs',
 'lgs',
@@ -266,11 +254,7 @@
 'pmvnzb',
 'pmvnzb',
 'pmvzb',
 'pmvzb',
 'pop',
 'pop',
-'popa',
-'popad',
-'popaw',
 'popf',
 'popf',
-'popfd',
 'popfw',
 'popfw',
 'popfq',
 'popfq',
 'por',
 'por',
@@ -300,11 +284,7 @@
 'punpckldq',
 'punpckldq',
 'punpcklwd',
 'punpcklwd',
 'push',
 'push',
-'pusha',
-'pushad',
-'pushaw',
 'pushf',
 'pushf',
-'pushfd',
 'pushfw',
 'pushfw',
 'pushfq',
 'pushfq',
 'pxor',
 'pxor',
@@ -329,7 +309,6 @@
 'rsm',
 'rsm',
 'sahf',
 'sahf',
 'sal',
 'sal',
-'salc',
 'sar',
 'sar',
 'sbb',
 'sbb',
 'scasb',
 'scasb',
@@ -946,6 +925,7 @@
 'vzeroupper',
 'vzeroupper',
 'andn',
 'andn',
 'bextr',
 'bextr',
+'tzcnt',
 'rorx',
 'rorx',
 'sarx',
 'sarx',
 'shlx',
 'shlx',
@@ -969,5 +949,74 @@
 'vpsllvq',
 'vpsllvq',
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
-'vpsrlvq'
+'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
+'vfmadd132pd',
+'vfmadd213pd',
+'vfmadd231pd',
+'vfmaddpd',
+'vfmadd132ps',
+'vfmadd213ps',
+'vfmadd231ps',
+'vfmadd132sd',
+'vfmadd213sd',
+'vfmadd231sd',
+'vfmadd132ss',
+'vfmadd213ss',
+'vfmadd231ss',
+'vfmaddsub132pd',
+'vfmaddsub213pd',
+'vfmaddsub231pd',
+'vfmaddsub132ps',
+'vfmaddsub213ps',
+'vfmaddsub231ps',
+'vfmsubadd132pd',
+'vfmsubadd213pd',
+'vfmsubadd231pd',
+'vfmsubadd132ps',
+'vfmsubadd213ps',
+'vfmsubadd231ps',
+'vfmsub132pd',
+'vfmsub213pd',
+'vfmsub231pd',
+'vfmsub132ps',
+'vfmsub213ps',
+'vfmsub231ps',
+'vfmsub132sd',
+'vfmsub213sd',
+'vfmsub231sd',
+'vfmsub132ss',
+'vfmsub213ss',
+'vfmsub231ss',
+'vfnmadd132pd',
+'vfnmadd213pd',
+'vfnmadd231pd',
+'vfnmadd132ps',
+'vfnmadd213ps',
+'vfnmadd231ps',
+'vfnmadd132sd',
+'vfnmadd213sd',
+'vfnmadd231sd',
+'vfnmadd132ss',
+'vfnmadd213ss',
+'vfnmadd231ss',
+'vfnmsub132pd',
+'vfnmsub213pd',
+'vfnmsub231pd',
+'vfnmsub132ps',
+'vfnmsub213ps',
+'vfnmsub231ps',
+'vfnmsub132sd',
+'vfnmsub213sd',
+'vfnmsub231sd',
+'vfnmsub132ss',
+'vfnmsub213ss',
+'vfnmsub231ss'
 );
 );

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1849;
+1963;

+ 71 - 22
compiler/x86_64/x8664op.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
 (
 (
 A_NONE,
 A_NONE,
-A_AAA,
-A_AAD,
-A_AAM,
-A_AAS,
 A_ADC,
 A_ADC,
 A_ADD,
 A_ADD,
 A_AND,
 A_AND,
-A_ARPL,
-A_BOUND,
 A_BSF,
 A_BSF,
 A_BSR,
 A_BSR,
 A_BSWAP,
 A_BSWAP,
@@ -35,8 +29,6 @@ A_CMPXCHG8B,
 A_CPUID,
 A_CPUID,
 A_CWD,
 A_CWD,
 A_CWDE,
 A_CWDE,
-A_DAA,
-A_DAS,
 A_DEC,
 A_DEC,
 A_DIV,
 A_DIV,
 A_EMMS,
 A_EMMS,
@@ -155,24 +147,20 @@ A_INT01,
 A_INT1,
 A_INT1,
 A_INT03,
 A_INT03,
 A_INT3,
 A_INT3,
-A_INTO,
 A_INVD,
 A_INVD,
 A_INVLPG,
 A_INVLPG,
 A_IRET,
 A_IRET,
 A_IRETD,
 A_IRETD,
 A_IRETW,
 A_IRETW,
 A_IRETQ,
 A_IRETQ,
-A_JCXZ,
 A_JECXZ,
 A_JECXZ,
 A_JRCXZ,
 A_JRCXZ,
 A_JMP,
 A_JMP,
 A_LAHF,
 A_LAHF,
 A_LAR,
 A_LAR,
 A_LCALL,
 A_LCALL,
-A_LDS,
 A_LEA,
 A_LEA,
 A_LEAVE,
 A_LEAVE,
-A_LES,
 A_LFS,
 A_LFS,
 A_LGDT,
 A_LGDT,
 A_LGS,
 A_LGS,
@@ -266,11 +254,7 @@ A_PMVLZB,
 A_PMVNZB,
 A_PMVNZB,
 A_PMVZB,
 A_PMVZB,
 A_POP,
 A_POP,
-A_POPA,
-A_POPAD,
-A_POPAW,
 A_POPF,
 A_POPF,
-A_POPFD,
 A_POPFW,
 A_POPFW,
 A_POPFQ,
 A_POPFQ,
 A_POR,
 A_POR,
@@ -300,11 +284,7 @@ A_PUNPCKLBW,
 A_PUNPCKLDQ,
 A_PUNPCKLDQ,
 A_PUNPCKLWD,
 A_PUNPCKLWD,
 A_PUSH,
 A_PUSH,
-A_PUSHA,
-A_PUSHAD,
-A_PUSHAW,
 A_PUSHF,
 A_PUSHF,
-A_PUSHFD,
 A_PUSHFW,
 A_PUSHFW,
 A_PUSHFQ,
 A_PUSHFQ,
 A_PXOR,
 A_PXOR,
@@ -329,7 +309,6 @@ A_RSLDT,
 A_RSM,
 A_RSM,
 A_SAHF,
 A_SAHF,
 A_SAL,
 A_SAL,
-A_SALC,
 A_SAR,
 A_SAR,
 A_SBB,
 A_SBB,
 A_SCASB,
 A_SCASB,
@@ -946,6 +925,7 @@ A_VZEROALL,
 A_VZEROUPPER,
 A_VZEROUPPER,
 A_ANDN,
 A_ANDN,
 A_BEXTR,
 A_BEXTR,
+A_TZCNT,
 A_RORX,
 A_RORX,
 A_SARX,
 A_SARX,
 A_SHLX,
 A_SHLX,
@@ -969,5 +949,74 @@ A_VPSLLVD,
 A_VPSLLVQ,
 A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVD,
-A_VPSRLVQ
+A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
+A_VFMADD132PD,
+A_VFMADD213PD,
+A_VFMADD231PD,
+A_VFMADDPD,
+A_VFMADD132PS,
+A_VFMADD213PS,
+A_VFMADD231PS,
+A_VFMADD132SD,
+A_VFMADD213SD,
+A_VFMADD231SD,
+A_VFMADD132SS,
+A_VFMADD213SS,
+A_VFMADD231SS,
+A_VFMADDSUB132PD,
+A_VFMADDSUB213PD,
+A_VFMADDSUB231PD,
+A_VFMADDSUB132PS,
+A_VFMADDSUB213PS,
+A_VFMADDSUB231PS,
+A_VFMSUBADD132PD,
+A_VFMSUBADD213PD,
+A_VFMSUBADD231PD,
+A_VFMSUBADD132PS,
+A_VFMSUBADD213PS,
+A_VFMSUBADD231PS,
+A_VFMSUB132PD,
+A_VFMSUB213PD,
+A_VFMSUB231PD,
+A_VFMSUB132PS,
+A_VFMSUB213PS,
+A_VFMSUB231PS,
+A_VFMSUB132SD,
+A_VFMSUB213SD,
+A_VFMSUB231SD,
+A_VFMSUB132SS,
+A_VFMSUB213SS,
+A_VFMSUB231SS,
+A_VFNMADD132PD,
+A_VFNMADD213PD,
+A_VFNMADD231PD,
+A_VFNMADD132PS,
+A_VFNMADD213PS,
+A_VFNMADD231PS,
+A_VFNMADD132SD,
+A_VFNMADD213SD,
+A_VFNMADD231SD,
+A_VFNMADD132SS,
+A_VFNMADD213SS,
+A_VFNMADD231SS,
+A_VFNMSUB132PD,
+A_VFNMSUB213PD,
+A_VFNMSUB231PD,
+A_VFNMSUB132PS,
+A_VFNMSUB213PS,
+A_VFNMSUB231PS,
+A_VFNMSUB132SD,
+A_VFNMSUB213SD,
+A_VFNMSUB231SD,
+A_VFNMSUB132SS,
+A_VFNMSUB213SS,
+A_VFNMSUB231SS
 );
 );

+ 140 - 91
compiler/x86_64/x8664pro.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
 (
 (
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
-(Ch: (Ch_WFlags, Ch_None, Ch_None)),
-(Ch: (Ch_Rop1, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
 (Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
 (Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
 (Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
 (Ch: (Ch_MOp1, Ch_None, Ch_None)),
 (Ch: (Ch_MOp1, Ch_None, Ch_None)),
@@ -35,8 +29,6 @@
 (Ch: (Ch_All, Ch_None, Ch_none)),
 (Ch: (Ch_All, Ch_None, Ch_none)),
 (Ch: (Ch_MEAX, Ch_WEDX, Ch_None)),
 (Ch: (Ch_MEAX, Ch_WEDX, Ch_None)),
 (Ch: (Ch_MEAX, Ch_None, Ch_None)),
 (Ch: (Ch_MEAX, Ch_None, Ch_None)),
-(Ch: (Ch_MEAX, Ch_None, Ch_None)),
-(Ch: (Ch_MEAX, Ch_None, Ch_None)),
 (Ch: (Ch_Mop1, Ch_WFlags, Ch_None)),
 (Ch: (Ch_Mop1, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWEAX, Ch_WEDX, Ch_WFlags)),
 (Ch: (Ch_RWEAX, Ch_WEDX, Ch_WFlags)),
 (Ch: (Ch_FPU, Ch_None, Ch_None)),
 (Ch: (Ch_FPU, Ch_None, Ch_None)),
@@ -161,8 +153,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_ROp1, Ch_None, Ch_None)),
 (Ch: (Ch_ROp1, Ch_None, Ch_None)),
@@ -170,10 +160,8 @@
 (Ch: (Ch_Wop2, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WEBP, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WEBP, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
@@ -224,8 +212,8 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -266,10 +254,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop1, Ch_RWESP, Ch_None)),
 (Ch: (Ch_Wop1, Ch_RWESP, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFLAGS, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFLAGS, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
@@ -300,10 +284,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Rop1, Ch_RWESP, Ch_None)),
 (Ch: (Ch_Rop1, Ch_RWESP, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFLAGS, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFLAGS, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
@@ -329,7 +309,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_WFlags, Ch_REAX, Ch_None)),
 (Ch: (Ch_WFlags, Ch_REAX, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
-(Ch: (Ch_WEAX, Ch_RFLAGS, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -389,7 +368,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
+(Ch: (Ch_ROp1, Ch_RWOp2, Ch_RFLAGS)),
 (Ch: (Ch_RFLAGS, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
@@ -489,8 +468,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -570,7 +549,7 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -730,6 +709,10 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -753,6 +736,12 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -761,28 +750,20 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -796,7 +777,15 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -859,6 +848,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -867,6 +864,24 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -876,15 +891,26 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -893,8 +919,17 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -923,51 +958,65 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None))
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1))
 );
 );

+ 807 - 9
compiler/x86_64/x8664tab.inc

@@ -2544,17 +2544,10 @@
   (
   (
     opcode  : A_LEA;
     opcode  : A_LEA;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32 or ot_bits64,ot_memory,ot_none,ot_none);
-    code    : #208#1#141#72;
+    optypes : (ot_reg16 or ot_bits32 or ot_bits64,ot_memory,ot_none,ot_none);
+    code    : #193#208#1#141#72;
     flags   : if_8086
     flags   : if_8086
   ),
   ),
-  (
-    opcode  : A_LEA;
-    ops     : 2;
-    optypes : (ot_reg32 or ot_bits64,ot_immediate,ot_none,ot_none);
-    code    : #208#1#141#72;
-    flags   : if_8086 or if_sd
-  ),
   (
   (
     opcode  : A_LEAVE;
     opcode  : A_LEAVE;
     ops     : 0;
     ops     : 0;
@@ -12579,6 +12572,13 @@
     code    : #242#243#249#1#247#62#72;
     code    : #242#243#249#1#247#62#72;
     flags   : if_bmi1 or if_x86_64
     flags   : if_bmi1 or if_x86_64
   ),
   ),
+  (
+    opcode  : A_TZCNT;
+    ops     : 2;
+    optypes : (ot_reg16 or ot_bits32 or ot_bits64,ot_rm_gpr,ot_none,ot_none);
+    code    : #208#219#2#15#188#72;
+    flags   : if_bmi1 or if_sm
+  ),
   (
   (
     opcode  : A_RORX;
     opcode  : A_RORX;
     ops     : 3;
     ops     : 3;
@@ -12942,5 +12942,803 @@
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #241#242#243#249#1#69#61#80;
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
     flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VFMADD132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_xmmreg);
+    code    : #241#242#250#1#105#61#80#247;
+    flags   : if_fma4
+  ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmreg,ot_xmmrm);
+    code    : #241#242#250#243#1#105#61#88#246;
+    flags   : if_fma4
+  ),
+  (
+    opcode  : A_VFMADD132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#153#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#169#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#185#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#153#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#169#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#185#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#155#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#171#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#187#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#155#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#171#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#187#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#157#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#173#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#189#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#157#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#173#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#189#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#159#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#175#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#191#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#159#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#175#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#191#61#80;
+    flags   : if_fma
   )
   )
 );
 );