Browse Source

merge with trunk

git-svn-id: branches/tg74/avx2@28411 -
tg74 11 years ago
parent
commit
e9f2dcabc5

+ 1 - 1
.gitattributes

@@ -806,7 +806,6 @@ compiler/x86_64/r8664int.inc svneol=native#text/plain
 compiler/x86_64/r8664iri.inc svneol=native#text/plain
 compiler/x86_64/r8664nor.inc svneol=native#text/plain
 compiler/x86_64/r8664num.inc svneol=native#text/plain
-compiler/x86_64/r8664op.inc svneol=native#text/plain
 compiler/x86_64/r8664ot.inc svneol=native#text/plain
 compiler/x86_64/r8664rni.inc svneol=native#text/plain
 compiler/x86_64/r8664sri.inc svneol=native#text/plain
@@ -815,6 +814,7 @@ compiler/x86_64/r8664std.inc svneol=native#text/plain
 compiler/x86_64/rax64att.pas svneol=native#text/plain
 compiler/x86_64/rax64int.pas svneol=native#text/plain
 compiler/x86_64/rgcpu.pas svneol=native#text/plain
+compiler/x86_64/symcpu.pas svneol=native#text/plain
 compiler/x86_64/win64unw.pas svneol=native#text/plain
 compiler/x86_64/x8664ats.inc svneol=native#text/plain
 compiler/x86_64/x8664att.inc svneol=native#text/plain

+ 209 - 1
compiler/x86_64/aoptcpu.pas

@@ -63,6 +63,94 @@ begin
     end;
 end;
 
+
+function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
+  begin
+    result :=
+      (instr.typ = ait_instruction) and
+      (taicpu(instr).opcode = op) and
+      ((opsize = []) or (taicpu(instr).opsize in opsize));
+  end;
+
+
+function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
+  begin
+    result :=
+      (instr.typ = ait_instruction) and
+      ((taicpu(instr).opcode = op1) or
+       (taicpu(instr).opcode = op2)
+      ) and
+      ((opsize = []) or (taicpu(instr).opsize in opsize));
+  end;
+
+
+function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
+  begin
+    result :=
+      (instr.typ = ait_instruction) and
+      ((taicpu(instr).opcode = op1) or
+       (taicpu(instr).opcode = op2) or
+       (taicpu(instr).opcode = op3)
+      ) and
+      ((opsize = []) or (taicpu(instr).opsize in opsize));
+  end;
+
+
+function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+  begin
+    result := (oper.typ = top_reg) and (oper.reg = reg);
+  end;
+
+
+function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
+  begin
+    result := (oper.typ = top_const) and (oper.val = a);
+  end;
+
+function refsequal(const r1, r2: treference): boolean;
+  begin
+    refsequal :=
+      (r1.offset = r2.offset) and
+      (r1.segment = r2.segment) and (r1.base = r2.base) and
+      (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+      (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
+      (r1.relsymbol = r2.relsymbol);
+  end;
+
+
+function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+  begin
+    result := oper1.typ = oper2.typ;
+
+    if result then
+      case oper1.typ of
+        top_const:
+          Result:=oper1.val = oper2.val;
+        top_reg:
+          Result:=oper1.reg = oper2.reg;
+        top_ref:
+          Result:=RefsEqual(oper1.ref^, oper2.ref^);
+        else
+          internalerror(2013102801);
+      end
+  end;
+
+
+function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
+  begin
+   Result:=(ref.offset=0) and
+     (ref.scalefactor in [0,1]) and
+     (ref.segment=NR_NO) and
+     (ref.symbol=nil) and
+     (ref.relsymbol=nil) and
+     ((base=NR_INVALID) or
+      (ref.base=base)) and
+     ((index=NR_INVALID) or
+      (ref.index=index));
+  end;
+
+
+
 function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
 var
   next1: tai;
@@ -99,7 +187,66 @@ begin
               asml.remove(p);
               p.Free;
               p:=hp1;
-            end;
+            end
+          else if (taicpu(p).oper[0]^.typ = top_const) and
+            (taicpu(p).oper[1]^.typ = top_reg) and
+            GetNextInstruction(p, hp1) and
+            MatchInstruction(hp1,A_MOVZX,[]) and
+            (taicpu(hp1).oper[0]^.typ = top_reg) and
+            MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
+            (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
+             (((taicpu(p).opsize=S_W) and
+               (taicpu(hp1).opsize=S_BW)) or
+              ((taicpu(p).opsize=S_L) and
+               (taicpu(hp1).opsize in [S_WL,S_BL])) or
+               ((taicpu(p).opsize=S_Q) and
+               (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
+              ) then
+                begin
+                  if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
+                      ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
+                      ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize)=S_LQ) and
+                      ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
+                     ) then
+                     begin
+                       if (cs_asm_source in current_settings.globalswitches) then
+                         asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovzToAnd')),p);
+                       asml.remove(hp1);
+                       hp1.free;
+                     end;
+                end
+          else if (taicpu(p).oper[0]^.typ = top_const) and
+            (taicpu(p).oper[1]^.typ = top_reg) and
+            GetNextInstruction(p, hp1) and
+            MatchInstruction(hp1,A_MOVSX,A_MOVSXD,[]) and
+            (taicpu(hp1).oper[0]^.typ = top_reg) and
+            MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
+            (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
+             (((taicpu(p).opsize=S_W) and
+               (taicpu(hp1).opsize=S_BW)) or
+              ((taicpu(p).opsize=S_L) and
+               (taicpu(hp1).opsize in [S_WL,S_BL])) or
+               ((taicpu(p).opsize=S_Q) and
+               (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
+              ) then
+                begin
+                  if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
+                      ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
+                      ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val)) or
+                     (((taicpu(hp1).opsize)=S_LQ) and
+                      ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
+                     ) then
+                     begin
+                       if (cs_asm_source in current_settings.globalswitches) then
+                         asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovsxToAnd')),p);
+                       asml.remove(hp1);
+                       hp1.free;
+                     end;
+                end;
+
 (*                      else
   {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
   jump, but only if it's a conditional jump (PFV) }
@@ -134,6 +281,41 @@ begin
                       hp1.free;
                     end;
               end
+            { Next instruction is also a MOV ? }
+            else if GetNextIntruction_p and
+              MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
+              begin
+                if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
+                   (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
+                    {mov reg1, mem1     or     mov mem1, reg1
+                     mov mem2, reg2            mov reg2, mem2}
+                  begin
+                    if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
+                      {mov reg1, mem1     or     mov mem1, reg1
+                       mov mem2, reg1            mov reg2, mem1}
+                      begin
+                        if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
+                          { Removes the second statement from
+                            mov reg1, mem1/reg2
+                            mov mem1/reg2, reg1 }
+                          begin
+                            { if (taicpu(p).oper[0]^.typ = top_reg) then
+                              AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); }
+                            if (cs_asm_source in current_settings.globalswitches) then
+                              asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2Mov1')),p);
+                            asml.remove(hp1);
+                            hp1.free;
+                          end;
+                      end
+                    else if (taicpu(p).oper[1]^.typ=top_ref) and
+                      OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
+                      begin
+                        taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
+                        if (cs_asm_source in current_settings.globalswitches) then
+                          asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2MovMov1')),p);
+                      end;
+                  end
+              end
             else if (taicpu(p).oper[1]^.typ = top_reg) and
               GetNextIntruction_p and
               (hp1.typ = ait_instruction) and
@@ -482,6 +664,32 @@ begin
                 end;
             end;
           end;
+        A_VDIVSD,
+        A_VDIVSS,
+        A_VSUBSD,
+        A_VSUBSS,
+        A_VMULSD,
+        A_VMULSS,
+        A_VADDSD,
+        A_VADDSS:
+          begin
+            if GetNextInstruction(p,hp1) and
+              { we mix single and double opperations here because we assume that the compiler
+                generates vmovapd only after double operations and vmovaps only after single operations }
+              MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
+              MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
+              (taicpu(hp1).oper[1]^.typ=top_reg) then
+              begin
+                CopyUsedRegs(TmpUsedRegs);
+                UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                If not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                  begin
+                    taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
+                    asml.Remove(hp1);
+                    hp1.Free;
+                  end;
+              end;
+          end;
         end;
       end;
     end;

+ 14 - 15
compiler/x86_64/cgcpu.pas

@@ -57,12 +57,12 @@ unit cgcpu;
     uses
        globtype,globals,verbose,systems,cutils,cclasses,
        symsym,symtable,defutil,paramgr,fmodule,cpupi,
-       rgobj,tgobj,rgcpu;
+       rgobj,tgobj,rgcpu,ncgutil;
 
 
     procedure Tcgx86_64.init_register_allocators;
       const
-        win64_saved_std_regs : array[0..6] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15);
+        win64_saved_std_regs : array[0..7] of tsuperregister = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP);
         others_saved_std_regs : array[0..4] of tsuperregister = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
         saved_regs_length : array[boolean] of longint = (5,7);
 
@@ -96,8 +96,16 @@ unit cgcpu;
               end;
           end;
         if target_info.system=system_x86_64_win64 then
-          rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
-            RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
+          begin
+            if (cs_userbp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+              begin
+                rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
+                  RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP],first_int_imreg,[]);
+              end
+            else
+              rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_R8,RS_R9,RS_R10,
+                RS_R11,RS_RBX,RS_RSI,RS_RDI,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[])
+          end
         else
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
             RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
@@ -140,7 +148,6 @@ unit cgcpu;
         frame_offset: longint;
         suppress_endprologue: boolean;
         stackmisalignment: longint;
-        para: tparavarsym;
         xmmsize: longint;
 
       procedure push_one_reg(reg: tregister);
@@ -196,15 +203,7 @@ unit cgcpu;
                 else
                   begin
                     push_regs;
-                    { load framepointer from hidden $parentfp parameter }
-                    para:=tparavarsym(current_procinfo.procdef.paras[0]);
-                    if not (vo_is_parentfp in para.varoptions) then
-                      InternalError(201201142);
-                    if (para.paraloc[calleeside].location^.loc<>LOC_REGISTER) or
-                       (para.paraloc[calleeside].location^.next<>nil) then
-                      InternalError(201201143);
-                    list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],
-                      para.paraloc[calleeside].location^.register,NR_FRAME_POINTER_REG));
+                    gen_load_frame_for_exceptfilter(list);
                     { Need only as much stack space as necessary to do the calls.
                       Exception filters don't have own local vars, and temps are 'mapped'
                       to the parent procedure.
@@ -488,7 +487,7 @@ unit cgcpu;
         a_loadaddr_ref_cgpara(list,href,para2);
         paramanager.freecgpara(list,para2);
         paramanager.freecgpara(list,para1);
-        g_call(current_asmdata.CurrAsmList,'_FPC_local_unwind');
+        g_call(list,'_FPC_local_unwind');
         para2.done;
         para1.done;
       end;

+ 4 - 0
compiler/x86_64/cpubase.inc

@@ -38,6 +38,8 @@ type
     S_YMM
   );
 
+  TOpSizes = set of topsize;
+
 {*****************************************************************************
                                   Registers
 *****************************************************************************}
@@ -126,6 +128,8 @@ const
       { these arrays differ between unix and win64 }
       saved_standard_registers : array of tsuperregister = nil;
       saved_mm_registers : array of tsuperregister = nil;
+
+      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
       { Required parameter alignment when calling a routine declared as
         stdcall and cdecl. The alignment value should be the one defined
         by GCC or the target ABI.

+ 32 - 4
compiler/x86_64/cpuinfo.pas

@@ -40,7 +40,10 @@ Type
 
    tcputype =
       (cpu_none,
-       cpu_athlon64
+       cpu_athlon64,
+       cpu_core_i,
+       cpu_core_avx,
+       cpu_core_avx2
       );
 
    tfputype =
@@ -77,7 +80,10 @@ Const
    ];
 
    cputypestr : array[tcputype] of string[10] = ('',
-     'ATHLON64'
+     'ATHLON64',
+     'COREI',
+     'COREAVX',
+     'COREAVX2'
    );
 
    fputypestr : array[tfputype] of string[6] = ('',
@@ -102,14 +108,36 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_stackframe,
+                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_stackframe,cs_userbp,
 				  cs_opt_tailrecursion,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + 
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
-   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
+   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_userbp];
+
+type
+   tcpuflags =
+      (CPUX86_HAS_SSEUNIT,
+       CPUX86_HAS_BMI1,
+       CPUX86_HAS_BMI2,
+       CPUX86_HAS_POPCNT,
+       CPUX86_HAS_AVXUNIT,
+       CPUX86_HAS_LZCNT,
+       CPUX86_HAS_MOVBE,
+       CPUX86_HAS_FMA,
+       CPUX86_HAS_FMA4
+      );
+
+ const
+   cpu_capabilities : array[tcputype] of set of tcpuflags = (
+     { cpu_none      } [],
+     { Athlon64      } [CPUX86_HAS_SSEUNIT],
+     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
+     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
+     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+   );
 
 Implementation
 

+ 3 - 1
compiler/x86_64/cpunode.pas

@@ -56,7 +56,9 @@ unit cpunode;
        nx64flw,
 {$endif DISABLE_WIN64_SEH}
        nx64inl,
-       nx64set
+       nx64set,
+       { symtable }
+       symcpu
        ;
 
 end.

+ 14 - 57
compiler/x86_64/cpupara.pas

@@ -41,7 +41,6 @@ unit cpupara;
           function param_use_paraloc(const cgpara:tcgpara):boolean;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
@@ -521,7 +520,10 @@ unit cpupara;
         num: longint;
         isbitpacked: boolean;
       begin
+        size:=0;
+        bitoffset:=0;
         result:=init_aggregate_classification(def,varspez,byte_offset,words,classes);
+
         if (words=0) then
           exit;
 
@@ -889,62 +891,6 @@ unit cpupara;
       end;
 
 
-    procedure tx86_64paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
-      var
-        paraloc : pcgparalocation;
-        psym : tparavarsym;
-        pdef : tdef;
-      begin
-        psym:=tparavarsym(pd.paras[nr-1]);
-        pdef:=psym.vardef;
-        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
-          pdef:=getpointerdef(pdef);
-        cgpara.reset;
-        cgpara.size:=def_cgsize(pdef);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=get_para_align(pd.proccalloption);
-        cgpara.def:=pdef;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-         begin
-           size:=def_cgsize(pdef);
-           paraloc^.def:=pdef;
-           if target_info.system=system_x86_64_win64 then
-             begin
-               if nr<1 then
-                 internalerror(200304303)
-               else if nr<=high(paraintsupregs_winx64)+1 then
-                 begin
-                    loc:=LOC_REGISTER;
-                    register:=newreg(R_INTREGISTER,paraintsupregs_winx64[nr-1],cgsize2subreg(R_INTREGISTER,size));
-                 end
-               else
-                 begin
-                    loc:=LOC_REFERENCE;
-                    reference.index:=NR_STACK_POINTER_REG;
-                    reference.offset:=(nr-6)*sizeof(aint);
-                 end;
-             end
-           else
-             begin
-               if nr<1 then
-                 internalerror(200304303)
-               else if nr<=high(paraintsupregs)+1 then
-                 begin
-                    loc:=LOC_REGISTER;
-                    register:=newreg(R_INTREGISTER,paraintsupregs[nr-1],cgsize2subreg(R_INTREGISTER,size));
-                 end
-               else
-                 begin
-                    loc:=LOC_REFERENCE;
-                    reference.index:=NR_STACK_POINTER_REG;
-                    reference.offset:=(nr-6)*sizeof(aint);
-                 end;
-             end;
-          end;
-      end;
-
-
     function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       const
         intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
@@ -1110,6 +1056,7 @@ unit cpupara;
         i,
         varalign,
         paraalign  : longint;
+        sym: tfieldvarsym;
       begin
         paraalign:=get_para_align(p.proccalloption);
         { Register parameters are assigned from left to right }
@@ -1117,6 +1064,16 @@ unit cpupara;
           begin
             hp:=tparavarsym(paras[i]);
             paradef:=hp.vardef;
+            { on win64, if a record has only one field and that field is a
+              single or double, it has to be handled like a single/double }
+            if (target_info.system=system_x86_64_win64) and
+               ((paradef.typ=recorddef) {or
+               is_object(paradef)}) and
+               tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(sym) and
+               (sym.vardef.typ=floatdef) and
+               (tfloatdef(sym.vardef).floattype in [s32real,s64real]) then
+              paradef:=sym.vardef;
+
             pushaddr:=push_addr_param(hp.varspez,paradef,p.proccalloption);
             if pushaddr then
               begin

+ 3 - 0
compiler/x86_64/cputarg.pas

@@ -60,6 +60,9 @@ implementation
     {$ifndef NOAGX86_64ATT}
       ,agx86att
     {$endif}
+    {$ifndef NOAGX86_64NSM}
+      ,agx86nsm
+    {$endif}
 
       ,ogcoff
       ,ogelf

+ 75 - 52
compiler/x86_64/nx64add.pas

@@ -37,10 +37,10 @@ interface
   implementation
 
     uses
-      globtype,globals,
+      globtype,globals,verbose,
       aasmbase,aasmtai,aasmdata,
       symdef,defutil,
-      cgbase,cgutils,cga,cgobj,hlcgobj,
+      cgbase,cgutils,cga,cgobj,hlcgobj,cgx86,
       tgobj;
 
 {*****************************************************************************
@@ -49,8 +49,10 @@ interface
 
     procedure tx8664addnode.second_addordinal;
     begin
-      { filter unsigned MUL opcode, which requires special handling }
+      { filter unsigned MUL opcode, which requires special handling.
+        Note that when overflow checking is off, we can use IMUL instead. }
       if (nodetype=muln) and
+        (cs_check_overflow in current_settings.localswitches) and
         (not(is_signed(left.resultdef)) or
          not(is_signed(right.resultdef))) then
       begin
@@ -66,59 +68,80 @@ interface
 *****************************************************************************}
 
     procedure tx8664addnode.second_mul;
-
-    var reg:Tregister;
+      var
+        reg,rega,regd:Tregister;
         ref:Treference;
         use_ref:boolean;
         hl4 : tasmlabel;
-
-    begin
-      pass_left_right;
-
-      { The location.register will be filled in later (JM) }
-      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
-      { Mul supports registers and references, so if not register/reference,
-        load the location into a register}
-      use_ref:=false;
-      if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
-        reg:=left.location.register
-      else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
-        begin
-          ref:=left.location.reference;
-          use_ref:=true;
-        end
-      else
-        begin
-          {LOC_CONSTANT for example.}
-          reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-          hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
+        cgsize:TCgSize;
+        opsize:topsize;
+      begin
+        reference_reset(ref,0);
+        reg:=NR_NO;
+
+        cgsize:=def_cgsize(resultdef);
+        opsize:=TCGSize2OpSize[cgsize];
+        case cgsize of
+          OS_S64,OS_64:
+            begin
+              rega:=NR_RAX;
+              regd:=NR_RDX;
+            end;
+          OS_S32,OS_32:
+            begin
+              rega:=NR_EAX;
+              regd:=NR_EDX;
+            end;
+          else
+            internalerror(2013102703);
         end;
-      { Allocate RAX. }
-      cg.getcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-      { Load the right value. }
-      hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_RAX);
-      { Also allocate RDX, since it is also modified by a mul (JM). }
-      cg.getcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-      if use_ref then
-        emit_ref(A_MUL,S_Q,ref)
-      else
-        emit_reg(A_MUL,S_Q,reg);
-      if cs_check_overflow in current_settings.localswitches  then
-       begin
-         current_asmdata.getjumplabel(hl4);
-         cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
-         cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
-         cg.a_label(current_asmdata.CurrAsmList,hl4);
-       end;
-      { Free RDX,RAX }
-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-      { Allocate a new register and store the result in RAX in it. }
-      location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-      emit_reg_reg(A_MOV,S_Q,NR_RAX,location.register);
-      location_freetemp(current_asmdata.CurrAsmList,left.location);
-      location_freetemp(current_asmdata.CurrAsmList,right.location);
-    end;
+
+        pass_left_right;
+
+        { The location.register will be filled in later (JM) }
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        { Mul supports registers and references, so if not register/reference,
+          load the location into a register}
+        use_ref:=false;
+        if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
+          reg:=left.location.register
+        else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
+          begin
+            ref:=left.location.reference;
+            use_ref:=true;
+          end
+        else
+          begin
+            {LOC_CONSTANT for example.}
+            reg:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+            hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,left.location,reg);
+          end;
+        { Allocate RAX. }
+        cg.getcpuregister(current_asmdata.CurrAsmList,rega);
+        { Load the right value. }
+        hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,resultdef,right.location,rega);
+        { Also allocate RDX, since it is also modified by a mul (JM). }
+        cg.getcpuregister(current_asmdata.CurrAsmList,regd);
+        if use_ref then
+          emit_ref(A_MUL,opsize,ref)
+        else
+          emit_reg(A_MUL,opsize,reg);
+        if cs_check_overflow in current_settings.localswitches  then
+         begin
+           current_asmdata.getjumplabel(hl4);
+           cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
+           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
+           cg.a_label(current_asmdata.CurrAsmList,hl4);
+         end;
+        { Free RDX,RAX }
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
+        { Allocate a new register and store the result in RAX in it. }
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+        emit_reg_reg(A_MOV,opsize,rega,location.register);
+        location_freetemp(current_asmdata.CurrAsmList,left.location);
+        location_freetemp(current_asmdata.CurrAsmList,right.location);
+      end;
 
 
 begin

+ 44 - 78
compiler/x86_64/nx64flw.pas

@@ -135,41 +135,6 @@ procedure tx64onnode.pass_generate_code;
   end;
 
 { tx64tryfinallynode }
-var
-  seq: longint=0;
-
-
-function create_pd: tprocdef;
-  var
-    st:TSymTable;
-    checkstack: psymtablestackitem;
-    sym:tprocsym;
-  begin
-    { get actual procedure symtable (skip withsymtables, etc.) }
-    st:=nil;
-    checkstack:=symtablestack.stack;
-    while assigned(checkstack) do
-      begin
-        st:=checkstack^.symtable;
-          if st.symtabletype in [staticsymtable,globalsymtable,localsymtable] then
-            break;
-          checkstack:=checkstack^.next;
-      end;
-    { Create a nested procedure, even from main_program_level. }
-    result:=tprocdef.create(max(normal_function_level,st.symtablelevel)+1);
-    result.struct:=current_procinfo.procdef.struct;
-    result.proctypeoption:=potype_exceptfilter;
-    handle_calling_convention(result);
-    sym:=tprocsym.create('$fin$'+tostr(seq));
-    st.insert(sym);
-    inc(seq);
-
-    result.procsym:=sym;
-    proc_add_definition(result);
-    result.forwarddef:=false;
-    result.aliasnames.insert(result.mangledname);
-    alloc_proc_symbol(result);
-  end;
 
 function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
   begin
@@ -194,47 +159,53 @@ function copy_parasize(var n: tnode; arg: pointer): foreachnoderesult;
 constructor tx64tryfinallynode.create(l, r: TNode);
   begin
     inherited create(l,r);
-    if (target_info.system<>system_x86_64_win64) or (
+    if (target_info.system=system_x86_64_win64) and
+       (
       { Don't create child procedures for generic methods, their nested-like
         behavior causes compilation errors because real nested procedures
         aren't allowed for generics. Not creating them doesn't harm because
         generic node tree is discarded without generating code. }
-        assigned(current_procinfo.procdef.struct) and
-        (df_generic in current_procinfo.procdef.struct.defoptions)
-      ) then
-      exit;
-    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
-    finalizepi.force_nested;
-    finalizepi.procdef:=create_pd;
-    finalizepi.entrypos:=r.fileinfo;
-    finalizepi.entryswitches:=r.localswitches;
-    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
-    finalizepi.exitswitches:=current_settings.localswitches;
-    { Regvar optimization for symbols is suppressed when using exceptions, but
-      temps may be still placed into registers. This must be fixed. }
-    foreachnodestatic(r,@reset_regvars,finalizepi);
+        not assigned(current_procinfo.procdef.struct) or
+        not(df_generic in current_procinfo.procdef.struct.defoptions)
+       ) then
+      begin
+        finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
+        finalizepi.force_nested;
+        finalizepi.procdef:=create_finalizer_procdef;
+        finalizepi.entrypos:=r.fileinfo;
+        finalizepi.entryswitches:=r.localswitches;
+        finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
+        finalizepi.exitswitches:=current_settings.localswitches;
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        { Regvar optimization for symbols is suppressed when using exceptions, but
+          temps may be still placed into registers. This must be fixed. }
+        foreachnodestatic(r,@reset_regvars,finalizepi);
+      end;
   end;
 
 constructor tx64tryfinallynode.create_implicit(l, r, _t1: TNode);
   begin
     inherited create_implicit(l, r, _t1);
-    if (target_info.system<>system_x86_64_win64) then
-      exit;
-
-    if assigned(current_procinfo.procdef.struct) and
-      (df_generic in current_procinfo.procdef.struct.defoptions) then
-      InternalError(2013012501);
-
-    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
-    finalizepi.force_nested;
-    finalizepi.procdef:=create_pd;
-
-    finalizepi.entrypos:=current_filepos;
-    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
-    finalizepi.entryswitches:=r.localswitches;
-    finalizepi.exitswitches:=current_settings.localswitches;
-    include(finalizepi.flags,pi_do_call);
-    finalizepi.allocate_push_parasize(32);
+    if (target_info.system=system_x86_64_win64) then
+      begin
+        if assigned(current_procinfo.procdef.struct) and
+          (df_generic in current_procinfo.procdef.struct.defoptions) then
+          InternalError(2013012501);
+
+        finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
+        finalizepi.force_nested;
+        finalizepi.procdef:=create_finalizer_procdef;
+
+        finalizepi.entrypos:=current_filepos;
+        finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
+        finalizepi.entryswitches:=r.localswitches;
+        finalizepi.exitswitches:=current_settings.localswitches;
+        include(finalizepi.flags,pi_do_call);
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        finalizepi.allocate_push_parasize(32);
+      end;
   end;
 
 function tx64tryfinallynode.simplify(forinline: boolean): tnode;
@@ -352,22 +323,13 @@ procedure tx64tryfinallynode.pass_generate_code;
       encode everything into a single scope record. }
     if catch_frame then
       begin
-        flowcontrol:=[fc_inflowcontrol];
-        secondpass(t1);
-        { note 1: this is not a 'finally' block, no flow restrictions apply
-          note 2: it contains autogenerated sequential code, flow away is impossible }
-        if flowcontrol<>[fc_inflowcontrol] then
-          CGMessage(cg_e_control_flow_outside_finally);
-        if codegenerror then
-          exit;
-
         if (current_procinfo.procdef.proccalloption=pocall_safecall) then
           begin
             handle_safecall_exception;
             cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
           end
         else
-          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_RERAISE_IMPLICIT',false);
+          InternalError(2014031601);
       end;
 
     flowcontrol:=[fc_inflowcontrol];
@@ -427,6 +389,10 @@ procedure tx64tryexceptnode.pass_generate_code;
     location_reset(location,LOC_VOID,OS_NO);
 
     oldflowcontrol:=flowcontrol;
+    exceptflowcontrol:=[];
+    continueexceptlabel:=nil;
+    breakexceptlabel:=nil;
+
     flowcontrol:=flowcontrol*[fc_unwind]+[fc_inflowcontrol];
     { this can be called recursivly }
     oldBreakLabel:=nil;
@@ -495,7 +461,7 @@ procedure tx64tryexceptnode.pass_generate_code;
               InternalError(2011103101);
             { TODO: make it done without using global label }
             current_asmdata.getglobaljumplabel(onlabel);
-            hlist.concat(tai_const.create_rva_sym(current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname)));
+            hlist.concat(tai_const.create_rva_sym(current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname,AT_DATA)));
             hlist.concat(tai_const.create_rva_sym(onlabel));
             cg.a_label(current_asmdata.CurrAsmList,onlabel);
             secondpass(hnode);

+ 22 - 108
compiler/x86_64/nx64mat.pas

@@ -29,10 +29,6 @@ interface
       node,nmat,ncgmat,nx86mat;
 
     type
-      tx8664moddivnode = class(tmoddivnode)
-         procedure pass_generate_code;override;
-      end;
-
       tx8664shlshrnode = class(tshlshrnode)
          procedure pass_generate_code;override;
       end;
@@ -55,96 +51,6 @@ implementation
       cgbase,cgutils,cga,cgobj,hlcgobj,cgx86,
       ncgutil;
 
-{*****************************************************************************
-                             TX8664MODDIVNODE
-*****************************************************************************}
-
-    procedure tx8664moddivnode.pass_generate_code;
-      var
-        hreg1,hreg2:Tregister;
-        power:longint;
-        op:Tasmop;
-      begin
-        secondpass(left);
-        if codegenerror then
-          exit;
-        secondpass(right);
-        if codegenerror then
-          exit;
-
-        { put numerator in register }
-        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
-        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
-        hreg1:=left.location.register;
-
-        if (nodetype=divn) and (right.nodetype=ordconstn) and
-           ispowerof2(int64(tordconstnode(right).value),power) then
-          begin
-            { for signed numbers, the numerator must be adjusted before the
-              shift instruction, but not wih unsigned numbers! Otherwise,
-              "Cardinal($ffffffff) div 16" overflows! (JM) }
-            if is_signed(left.resultdef) Then
-              begin
-                  { use a sequence without jumps, saw this in
-                    comp.compilers (JM) }
-                  { no jumps, but more operations }
-                  hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-                  emit_reg_reg(A_MOV,S_Q,hreg1,hreg2);
-                  {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
-                  emit_const_reg(A_SAR,S_Q,63,hreg2);
-                  {If signed, hreg2=right value-1, otherwise 0.}
-                  { (don't use emit_const_reg, because if value>high(longint)
-                     then it must first be loaded into a register) }
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_S64,tordconstnode(right).value-1,hreg2);
-                  { add to the left value }
-                  emit_reg_reg(A_ADD,S_Q,hreg2,hreg1);
-                  { do the shift }
-                  emit_const_reg(A_SAR,S_Q,power,hreg1);
-              end
-            else
-              emit_const_reg(A_SHR,S_Q,power,hreg1);
-            location.register:=hreg1;
-          end
-        else
-          begin
-            {Bring denominator to a register.}
-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-            emit_reg_reg(A_MOV,S_Q,hreg1,NR_RAX);
-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-            {Sign extension depends on the left type.}
-            if torddef(left.resultdef).ordtype=u64bit then
-              emit_reg_reg(A_XOR,S_Q,NR_RDX,NR_RDX)
-            else
-              emit_none(A_CQO,S_NO);
-
-            {Division depends on the right type.}
-            if Torddef(right.resultdef).ordtype=u64bit then
-              op:=A_DIV
-            else
-              op:=A_IDIV;
-
-            if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
-              emit_ref(op,S_Q,right.location.reference)
-            else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
-              emit_reg(op,S_Q,right.location.register)
-            else
-              begin
-                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
-                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u64inttype,right.location,hreg1);
-                emit_reg(op,S_Q,hreg1);
-              end;
-
-            { Copy the result into a new register. Release RAX & RDX.}
-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RDX);
-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RAX);
-            location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-            if nodetype=divn then
-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_RAX,location.register)
-            else
-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_RDX,location.register);
-          end;
-      end;
-
 
 {*****************************************************************************
                              TX8664SHLRSHRNODE
@@ -153,7 +59,7 @@ implementation
 
     procedure tx8664shlshrnode.pass_generate_code;
       var
-        op : Tasmop;
+        op : topcg;
         opsize : tcgsize;
         mask : aint;
       begin
@@ -162,9 +68,9 @@ implementation
 
         { determine operator }
         if nodetype=shln then
-          op:=A_SHL
+          op:=OP_SHL
         else
-          op:=A_SHR;
+          op:=OP_SHR;
 
         { special treatment of 32bit values for backwards compatibility }
         { mul optimizations require to keep the sign (FK) }
@@ -186,28 +92,36 @@ implementation
           end;
 
         { load left operators in a register }
-        location_copy(location,left.location);
-        hlcg.location_force_reg(current_asmdata.CurrAsmList,location,left.resultdef,cgsize_orddef(opsize),false);
+        if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+          { location_force_reg can be also used to change the size of a register }
+          (left.location.size<>opsize) then
+          hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
+        location_reset(location,LOC_REGISTER,opsize);
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
 
         { shifting by a constant directly coded: }
         if (right.nodetype=ordconstn) then
-          emit_const_reg(op,tcgsize2opsize[opsize],tordconstnode(right).value and mask,location.register)
+          cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,op,location.size,
+            tordconstnode(right).value.uvalue and mask,left.location.register,location.register)
         else
           begin
-            { load right operators in a RCX }
-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RCX);
-            hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_RCX);
-
-            { right operand is in ECX }
-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RCX);
-            emit_reg_reg(op,tcgsize2opsize[opsize],NR_CL,location.register);
+            { load right operators in a register - this
+              is done since most target cpu which will use this
+              node do not support a shift count in a mem. location (cec)
+            }
+            if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+               { location_force_reg can be also used to change the size of a register }
+              (right.location.size<>opsize) then
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,cgsize_orddef(opsize),true);
+
+            cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,op,opsize,right.location.register,left.location.register,location.register);
           end;
       end;
 
 
 begin
    cunaryminusnode:=tx8664unaryminusnode;
-   cmoddivnode:=tx8664moddivnode;
+   cmoddivnode:=tx86moddivnode;
    cshlshrnode:=tx8664shlshrnode;
    cnotnode:=tx8664notnode;
 end.

+ 111 - 2
compiler/x86_64/nx64set.pas

@@ -1,7 +1,7 @@
 {
     Copyright (c) 1998-2002 by Florian Klaempfl
 
-    Generate i386 assembler for in set/case nodes
+    Generate x86_64 assembler for in set/case nodes
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -32,13 +32,26 @@ interface
     type
       tx8664casenode = class(tx86casenode)
          procedure optimizevalues(var max_linear_list:aint;var max_dist:aword);override;
+         procedure genjumptable(hp : pcaselabel;min_,max_ : aint);override;
       end;
 
 
 implementation
 
+    uses
+      systems,
+      verbose,globals,constexp,
+      symconst,symdef,defutil,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      cgbase,pass_2,
+      ncon,
+      cpubase,cpuinfo,procinfo,
+      cga,cgutils,cgobj,ncgutil,
+      cgx86;
+
+
 {*****************************************************************************
-                            TI386CASENODE
+                            TX8664CASENODE
 *****************************************************************************}
 
     procedure tx8664casenode.optimizevalues(var max_linear_list:aint;var max_dist:aword);
@@ -46,6 +59,102 @@ implementation
         inc(max_linear_list,9);
       end;
 
+
+    { Always generate position-independent jump table, it is twice less in size at a price
+      of two extra instructions (which shouldn't cause more slowdown than pipeline trashing) }
+    procedure tx8664casenode.genjumptable(hp : pcaselabel; min_,max_ : aint);
+      var
+        last: TConstExprInt;
+        tablelabel: TAsmLabel;
+        basereg,indexreg,jumpreg: TRegister;
+        href: TReference;
+        opcgsize: tcgsize;
+        sectype: TAsmSectiontype;
+        jtitemconsttype: taiconst_type;
+
+      procedure genitem(list:TAsmList;t : pcaselabel);
+        var
+          i : aint;
+        begin
+          if assigned(t^.less) then
+            genitem(list,t^.less);
+          { fill possible hole }
+          i:=last.svalue+1;
+          while i<=t^._low.svalue-1 do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
+              inc(i);
+            end;
+          i:=t^._low.svalue;
+          while i<=t^._high.svalue do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
+              inc(i);
+            end;
+          last:=t^._high;
+          if assigned(t^.greater) then
+            genitem(list,t^.greater);
+        end;
+
+      begin
+        if not(target_info.system in systems_darwin) then
+          jtitemconsttype:=aitconst_32bit
+        else
+          { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
+          jtitemconsttype:=aitconst_darwin_dwarf_delta32;
+
+        last:=min_;
+        opcgsize:=def_cgsize(opsize);
+        if not(jumptable_no_range) then
+          begin
+             { a <= x <= b <-> unsigned(x-a) <= (b-a) }
+             cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
+             { case expr greater than max_ => goto elselabel }
+             cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
+             min_:=0;
+          end;
+        { local label in order to avoid using GOT }
+        current_asmdata.getlabel(tablelabel,alt_data);
+        indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
+        { load table address }
+        reference_reset_symbol(href,tablelabel,0,4);
+        basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
+        { load table slot, 32-bit sign extended }
+        reference_reset_base(href,basereg,-aint(min_)*4,4);
+        href.index:=indexreg;
+        href.scalefactor:=4;
+        jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
+        { add table address }
+        reference_reset_base(href,basereg,0,sizeof(pint));
+        href.index:=jumpreg;
+        href.scalefactor:=1;
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,jumpreg);
+        { and finally jump }
+        emit_reg(A_JMP,S_NO,jumpreg);
+        { generate jump table }
+        if not(target_info.system in systems_darwin) then
+          sectype:=sec_rodata
+        else
+          { on Mac OS X, dead code stripping ("smart linking") happens based on
+            global symbols: every global/static symbol (symbols that do not
+            start with "L") marks the start of a new "subsection" that is
+            discarded by the linker if there are no references to this symbol.
+            This means that if you put the jump table in the rodata section, it
+            will become part of the block of data associated with the previous
+            non-L-label in the rodata section and stay or be thrown away
+            depending on whether that block of data is referenced. Therefore,
+            jump tables must be added in the code section and since aktlocaldata
+            is inserted right after the routine, it will become part of the
+            same subsection that contains the routine's code }
+          sectype:=sec_code;
+        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
+        current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
+        genitem(current_procinfo.aktlocaldata,hp);
+      end;
+
 begin
    ccasenode:=tx8664casenode;
 end.

+ 10 - 10
compiler/x86_64/r8664ari.inc

@@ -9,22 +9,22 @@
 18,
 7,
 6,
-83,
 84,
 85,
 86,
-71,
+87,
+70,
 8,
 12,
 26,
 25,
 11,
-77,
 78,
 79,
 80,
 81,
 82,
+83,
 72,
 13,
 4,
@@ -33,13 +33,13 @@
 9,
 27,
 14,
-70,
-73,
+77,
+69,
 23,
 35,
-92,
 75,
-76,
+73,
+74,
 102,
 103,
 104,
@@ -86,14 +86,14 @@
 10,
 28,
 15,
-69,
+76,
 24,
 36,
 22,
 21,
 34,
 33,
-74,
+71,
 101,
 93,
 94,
@@ -103,11 +103,11 @@
 98,
 99,
 100,
-87,
 88,
 89,
 90,
 91,
+92,
 110,
 111,
 120,

+ 5 - 5
compiler/x86_64/r8664att.inc

@@ -68,14 +68,15 @@
 '%r15b',
 '%r15w',
 '%r15d',
-'%rip',
-'%eip',
-'%cs',
-'%ds',
 '%es',
+'%cs',
 '%ss',
+'%ds',
 '%fs',
 '%gs',
+'%flags',
+'%rip',
+'%eip',
 '%dr0',
 '%dr1',
 '%dr2',
@@ -91,7 +92,6 @@
 '%tr5',
 '%tr6',
 '%tr7',
-'%flags',
 '%st(0)',
 '%st(1)',
 '%st(2)',

+ 23 - 23
compiler/x86_64/r8664con.inc

@@ -68,30 +68,30 @@ NR_R15 = tregister($0105000f);
 NR_R15L = tregister($0101000f);
 NR_R15W = tregister($0103000f);
 NR_R15D = tregister($0104000f);
-NR_RIP = tregister($05050000);
-NR_EIP = tregister($05040000);
+NR_ES = tregister($05000000);
 NR_CS = tregister($05000001);
-NR_DS = tregister($05000002);
-NR_ES = tregister($05000003);
-NR_SS = tregister($05000004);
-NR_FS = tregister($05000005);
-NR_GS = tregister($05000006);
-NR_DR0 = tregister($05000007);
-NR_DR1 = tregister($05000008);
-NR_DR2 = tregister($05000009);
-NR_DR3 = tregister($0500000a);
-NR_DR6 = tregister($0500000b);
-NR_DR7 = tregister($0500000c);
-NR_CR0 = tregister($0500000d);
-NR_CR2 = tregister($0500000e);
-NR_CR3 = tregister($0500000f);
-NR_CR4 = tregister($05000010);
-NR_TR3 = tregister($05000011);
-NR_TR4 = tregister($05000012);
-NR_TR5 = tregister($05000013);
-NR_TR6 = tregister($05000014);
-NR_TR7 = tregister($05000015);
-NR_FLAGS = tregister($05000016);
+NR_SS = tregister($05000002);
+NR_DS = tregister($05000003);
+NR_FS = tregister($05000004);
+NR_GS = tregister($05000005);
+NR_FLAGS = tregister($05000006);
+NR_RIP = tregister($05050007);
+NR_EIP = tregister($05040007);
+NR_DR0 = tregister($05000008);
+NR_DR1 = tregister($05000009);
+NR_DR2 = tregister($0500000a);
+NR_DR3 = tregister($0500000b);
+NR_DR6 = tregister($0500000d);
+NR_DR7 = tregister($0500000e);
+NR_CR0 = tregister($05000010);
+NR_CR2 = tregister($05000012);
+NR_CR3 = tregister($05000013);
+NR_CR4 = tregister($05000014);
+NR_TR3 = tregister($0500001b);
+NR_TR4 = tregister($0500001c);
+NR_TR5 = tregister($0500001d);
+NR_TR6 = tregister($0500001e);
+NR_TR7 = tregister($0500001f);
 NR_ST0 = tregister($02000000);
 NR_ST1 = tregister($02000001);
 NR_ST2 = tregister($02000002);

+ 2 - 2
compiler/x86_64/r8664dwrf.inc

@@ -68,8 +68,6 @@
 15,
 15,
 15,
-16,
-16,
 -1,
 -1,
 -1,
@@ -77,6 +75,8 @@
 -1,
 -1,
 -1,
+16,
+16,
 -1,
 -1,
 -1,

+ 5 - 5
compiler/x86_64/r8664int.inc

@@ -68,14 +68,15 @@
 'r15b',
 'r15w',
 'r15d',
-'rip',
-'eip',
-'cs',
-'ds',
 'es',
+'cs',
 'ss',
+'ds',
 'fs',
 'gs',
+'flags',
+'rip',
+'eip',
 'dr0',
 'dr1',
 'dr2',
@@ -91,7 +92,6 @@
 'tr5',
 'tr6',
 'tr7',
-'flags',
 'st(0)',
 'st(1)',
 'st(2)',

+ 10 - 10
compiler/x86_64/r8664iri.inc

@@ -10,22 +10,22 @@
 18,
 7,
 6,
-83,
 84,
 85,
 86,
-71,
+87,
+70,
 8,
 12,
 26,
 25,
 11,
-77,
 78,
 79,
 80,
 81,
 82,
+83,
 72,
 13,
 4,
@@ -34,13 +34,13 @@
 9,
 27,
 14,
-70,
-73,
+77,
+69,
 23,
 35,
-92,
 75,
-76,
+73,
+74,
 102,
 103,
 104,
@@ -87,14 +87,14 @@
 10,
 28,
 15,
-69,
+76,
 24,
 36,
 22,
 21,
 34,
 33,
-74,
+71,
 101,
 93,
 94,
@@ -104,11 +104,11 @@
 98,
 99,
 100,
-87,
 88,
 89,
 90,
 91,
+92,
 110,
 111,
 120,

+ 8 - 8
compiler/x86_64/r8664num.inc

@@ -68,30 +68,30 @@ tregister($0105000f),
 tregister($0101000f),
 tregister($0103000f),
 tregister($0104000f),
-tregister($05050000),
-tregister($05040000),
+tregister($05000000),
 tregister($05000001),
 tregister($05000002),
 tregister($05000003),
 tregister($05000004),
 tregister($05000005),
 tregister($05000006),
-tregister($05000007),
+tregister($05050007),
+tregister($05040007),
 tregister($05000008),
 tregister($05000009),
 tregister($0500000a),
 tregister($0500000b),
-tregister($0500000c),
 tregister($0500000d),
 tregister($0500000e),
-tregister($0500000f),
 tregister($05000010),
-tregister($05000011),
 tregister($05000012),
 tregister($05000013),
 tregister($05000014),
-tregister($05000015),
-tregister($05000016),
+tregister($0500001b),
+tregister($0500001c),
+tregister($0500001d),
+tregister($0500001e),
+tregister($0500001f),
 tregister($02000000),
 tregister($02000001),
 tregister($02000002),

+ 0 - 143
compiler/x86_64/r8664op.inc

@@ -1,143 +0,0 @@
-{ don't edit, this file is generated from x86reg.dat }
-0,
-0,
-4,
-0,
-0,
-0,
-1,
-5,
-1,
-1,
-1,
-2,
-6,
-2,
-2,
-2,
-3,
-7,
-3,
-3,
-3,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-5,
-5,
-5,
-5,
-4,
-4,
-4,
-4,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-0,
-0,
-1,
-3,
-0,
-2,
-4,
-5,
-0,
-1,
-2,
-3,
-6,
-7,
-0,
-2,
-3,
-4,
-3,
-4,
-5,
-6,
-7,
-0,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7,
-0,
-1,
-2,
-3,
-4,
-5,
-6,
-7

+ 4 - 4
compiler/x86_64/r8664ot.inc

@@ -68,14 +68,15 @@ OT_REG64,
 OT_REG8,
 OT_REG16,
 OT_REG32,
-OT_NONE,
-OT_NONE,
-OT_REG_CS,
 OT_REG_DESS,
+OT_REG_CS,
 OT_REG_DESS,
 OT_REG_DESS,
 OT_REG_FSGS,
 OT_REG_FSGS,
+OT_NONE,
+OT_NONE,
+OT_NONE,
 OT_REG_DREG,
 OT_REG_DREG,
 OT_REG_DREG,
@@ -91,7 +92,6 @@ OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
-OT_NONE,
 OT_FPU0,
 OT_FPUREG,
 OT_FPUREG,

+ 4 - 4
compiler/x86_64/r8664rni.inc

@@ -117,13 +117,13 @@
 139,
 140,
 141,
+69,
+70,
 71,
 72,
 73,
 74,
 75,
-76,
-77,
 78,
 79,
 80,
@@ -139,5 +139,5 @@
 90,
 91,
 92,
-70,
-69
+77,
+76

+ 10 - 10
compiler/x86_64/r8664sri.inc

@@ -10,22 +10,22 @@
 18,
 7,
 6,
-83,
 84,
 85,
 86,
-71,
+87,
+70,
 8,
 12,
 26,
 25,
 11,
-77,
 78,
 79,
 80,
 81,
 82,
+83,
 72,
 13,
 4,
@@ -34,13 +34,13 @@
 9,
 27,
 14,
-70,
-73,
+77,
+69,
 23,
 35,
-92,
 75,
-76,
+73,
+74,
 102,
 103,
 104,
@@ -87,14 +87,14 @@
 10,
 28,
 15,
-69,
+76,
 24,
 36,
 22,
 21,
 34,
 33,
-74,
+71,
 101,
 93,
 94,
@@ -104,11 +104,11 @@
 98,
 99,
 100,
-87,
 88,
 89,
 90,
 91,
+92,
 110,
 111,
 120,

+ 2 - 2
compiler/x86_64/r8664stab.inc

@@ -68,8 +68,6 @@
 15,
 15,
 15,
-16,
-16,
 -1,
 -1,
 -1,
@@ -77,6 +75,8 @@
 -1,
 -1,
 -1,
+16,
+16,
 -1,
 -1,
 -1,

+ 5 - 5
compiler/x86_64/r8664std.inc

@@ -68,14 +68,15 @@
 'r15b',
 'r15w',
 'r15d',
-'rip',
-'eip',
-'cs',
-'ds',
 'es',
+'cs',
 'ss',
+'ds',
 'fs',
 'gs',
+'flags',
+'rip',
+'eip',
 'dr0',
 'dr1',
 'dr2',
@@ -91,7 +92,6 @@
 'tr5',
 'tr6',
 'tr7',
-'flags',
 'st(0)',
 'st(1)',
 'st(2)',

+ 1 - 1
compiler/x86_64/rgcpu.pas

@@ -47,7 +47,7 @@ unit rgcpu;
         supreg:=getsupreg(reg);
         { All registers conflict with rsp/rbp }
         add_edge(supreg,RS_RSP);
-        add_edge(supreg,RS_RBP);
+        // add_edge(supreg,RS_RBP);
       end;
 
 end.

+ 211 - 0
compiler/x86_64/symcpu.pas

@@ -0,0 +1,211 @@
+{
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Symbol table overrides for x86_64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit symcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  symtype,symdef,symsym,symx86;
+
+type
+  { defs }
+  tcpufiledef = class(tfiledef)
+  end;
+  tcpufiledefclass = class of tcpufiledef;
+
+  tcpuvariantdef = class(tvariantdef)
+  end;
+  tcpuvariantdefclass = class of tcpuvariantdef;
+
+  tcpuformaldef = class(tformaldef)
+  end;
+  tcpuformaldefclass = class of tcpuformaldef;
+
+  tcpuforwarddef = class(tforwarddef)
+  end;
+  tcpuforwarddefclass = class of tcpuforwarddef;
+
+  tcpuundefineddef = class(tundefineddef)
+  end;
+  tcpuundefineddefclass = class of tcpuundefineddef;
+
+  tcpuerrordef = class(terrordef)
+  end;
+  tcpuerrordefclass = class of tcpuerrordef;
+
+  tcpupointerdef = class(tx86pointerdef)
+  end;
+  tcpupointerdefclass = class of tcpupointerdef;
+
+  tcpurecorddef = class(trecorddef)
+  end;
+  tcpurecorddefclass = class of tcpurecorddef;
+
+  tcpuimplementedinterface = class(timplementedinterface)
+  end;
+  tcpuimplementedinterfaceclass = class of tcpuimplementedinterface;
+
+  tcpuobjectdef = class(tobjectdef)
+  end;
+  tcpuobjectdefclass = class of tcpuobjectdef;
+
+  tcpuclassrefdef = class(tclassrefdef)
+  end;
+  tcpuclassrefdefclass = class of tcpuclassrefdef;
+
+  tcpuarraydef = class(tarraydef)
+  end;
+  tcpuarraydefclass = class of tcpuarraydef;
+
+  tcpuorddef = class(torddef)
+  end;
+  tcpuorddefclass = class of tcpuorddef;
+
+  tcpufloatdef = class(tfloatdef)
+  end;
+  tcpufloatdefclass = class of tcpufloatdef;
+
+  tcpuprocvardef = class(tprocvardef)
+  end;
+  tcpuprocvardefclass = class of tcpuprocvardef;
+
+  tcpuprocdef = class(tprocdef)
+  end;
+  tcpuprocdefclass = class of tcpuprocdef;
+
+  tcpustringdef = class(tstringdef)
+  end;
+  tcpustringdefclass = class of tcpustringdef;
+
+  tcpuenumdef = class(tenumdef)
+  end;
+  tcpuenumdefclass = class of tcpuenumdef;
+
+  tcpusetdef = class(tsetdef)
+  end;
+  tcpusetdefclass = class of tcpusetdef;
+
+  { syms }
+  tcpulabelsym = class(tlabelsym)
+  end;
+  tcpulabelsymclass = class of tcpulabelsym;
+
+  tcpuunitsym = class(tunitsym)
+  end;
+  tcpuunitsymclass = class of tcpuunitsym;
+
+  tcpunamespacesym = class(tnamespacesym)
+  end;
+  tcpunamespacesymclass = class of tcpunamespacesym;
+
+  tcpuprocsym = class(tprocsym)
+  end;
+  tcpuprocsymclass = class of tcpuprocsym;
+
+  tcputypesym = class(ttypesym)
+  end;
+  tcpuypesymclass = class of tcputypesym;
+
+  tcpufieldvarsym = class(tfieldvarsym)
+  end;
+  tcpufieldvarsymclass = class of tcpufieldvarsym;
+
+  tcpulocalvarsym = class(tlocalvarsym)
+  end;
+  tcpulocalvarsymclass = class of tcpulocalvarsym;
+
+  tcpuparavarsym = class(tparavarsym)
+  end;
+  tcpuparavarsymclass = class of tcpuparavarsym;
+
+  tcpustaticvarsym = class(tstaticvarsym)
+  end;
+  tcpustaticvarsymclass = class of tcpustaticvarsym;
+
+  tcpuabsolutevarsym = class(tabsolutevarsym)
+  end;
+  tcpuabsolutevarsymclass = class of tcpuabsolutevarsym;
+
+  tcpupropertysym = class(tpropertysym)
+  end;
+  tcpupropertysymclass = class of tcpupropertysym;
+
+  tcpuconstsym = class(tconstsym)
+  end;
+  tcpuconstsymclass = class of tcpuconstsym;
+
+  tcpuenumsym = class(tenumsym)
+  end;
+  tcpuenumsymclass = class of tcpuenumsym;
+
+  tcpusyssym = class(tsyssym)
+  end;
+  tcpusyssymclass = class of tcpusyssym;
+
+
+const
+  pbestrealtype : ^tdef = @s80floattype;
+
+
+implementation
+
+begin
+  { used tdef classes }
+  cfiledef:=tcpufiledef;
+  cvariantdef:=tcpuvariantdef;
+  cformaldef:=tcpuformaldef;
+  cforwarddef:=tcpuforwarddef;
+  cundefineddef:=tcpuundefineddef;
+  cerrordef:=tcpuerrordef;
+  cpointerdef:=tcpupointerdef;
+  crecorddef:=tcpurecorddef;
+  cimplementedinterface:=tcpuimplementedinterface;
+  cobjectdef:=tcpuobjectdef;
+  cclassrefdef:=tcpuclassrefdef;
+  carraydef:=tcpuarraydef;
+  corddef:=tcpuorddef;
+  cfloatdef:=tcpufloatdef;
+  cprocvardef:=tcpuprocvardef;
+  cprocdef:=tcpuprocdef;
+  cstringdef:=tcpustringdef;
+  cenumdef:=tcpuenumdef;
+  csetdef:=tcpusetdef;
+
+  { used tsym classes }
+  clabelsym:=tcpulabelsym;
+  cunitsym:=tcpuunitsym;
+  cnamespacesym:=tcpunamespacesym;
+  cprocsym:=tcpuprocsym;
+  ctypesym:=tcputypesym;
+  cfieldvarsym:=tcpufieldvarsym;
+  clocalvarsym:=tcpulocalvarsym;
+  cparavarsym:=tcpuparavarsym;
+  cstaticvarsym:=tcpustaticvarsym;
+  cabsolutevarsym:=tcpuabsolutevarsym;
+  cpropertysym:=tcpupropertysym;
+  cconstsym:=tcpuconstsym;
+  cenumsym:=tcpuenumsym;
+  csyssym:=tcpusyssym;
+end.
+

+ 70 - 21
compiler/x86_64/x8664ats.inc

@@ -1,12 +1,6 @@
 { don't edit, this file is generated from x86ins.dat }
 (
 attsufNONE,
-attsufNONE,
-attsufINT,
-attsufINT,
-attsufNONE,
-attsufINT,
-attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
@@ -35,8 +29,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufNONE,
-attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,
@@ -156,7 +148,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufNONE,
 attsufINT,
 attsufNONE,
 attsufNONE,
@@ -164,19 +155,16 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufNONE,
 attsufINT,
 attsufNONE,
 attsufINT,
 attsufNONE,
 attsufINT,
-attsufINT,
 attsufNONE,
 attsufINT,
 attsufINT,
 attsufINT,
 attsufINT,
-attsufINT,
 attsufNONE,
 attsufINT,
 attsufINT,
@@ -266,10 +254,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
-attsufINT,
-attsufNONE,
-attsufNONE,
-attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
@@ -300,10 +284,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
-attsufINT,
-attsufNONE,
-attsufNONE,
-attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
@@ -329,7 +309,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
-attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,
@@ -969,5 +948,75 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 71 - 22
compiler/x86_64/x8664att.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 (
 'none',
-'aaa',
-'aad',
-'aam',
-'aas',
 'adc',
 'add',
 'and',
-'arpl',
-'bound',
 'bsf',
 'bsr',
 'bswap',
@@ -35,8 +29,6 @@
 'cpuid',
 'cwd',
 'cwtl',
-'daa',
-'das',
 'dec',
 'div',
 'emms',
@@ -155,24 +147,20 @@
 'int1',
 'int03',
 'int3',
-'into',
 'invd',
 'invlpg',
 'iret',
 'iret',
 'iretw',
 'iretq',
-'jcxz',
 'jecxz',
 'jrcxz',
 'jmp',
 'lahf',
 'lar',
 'lcall',
-'lds',
 'lea',
 'leave',
-'les',
 'lfs',
 'lgdt',
 'lgs',
@@ -266,11 +254,7 @@
 'pmvnzb',
 'pmvzb',
 'pop',
-'popa',
-'popal',
-'popaw',
 'popf',
-'popfl',
 'popfw',
 'popfq',
 'por',
@@ -300,11 +284,7 @@
 'punpckldq',
 'punpcklwd',
 'push',
-'pusha',
-'pushal',
-'pushaw',
 'pushf',
-'pushfl',
 'pushfw',
 'pushfq',
 'pxor',
@@ -329,7 +309,6 @@
 'rsm',
 'sahf',
 'sal',
-'salc',
 'sar',
 'sbb',
 'scasb',
@@ -946,6 +925,7 @@
 'vzeroupper',
 'andn',
 'bextr',
+'tzcnt',
 'rorx',
 'sarx',
 'shlx',
@@ -969,5 +949,74 @@
 'vpsllvq',
 'vpsravd',
 'vpsrlvd',
-'vpsrlvq'
+'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
+'vfmadd132pd',
+'vfmadd213pd',
+'vfmadd231pd',
+'vfmaddpd',
+'vfmadd132ps',
+'vfmadd213ps',
+'vfmadd231ps',
+'vfmadd132sd',
+'vfmadd213sd',
+'vfmadd231sd',
+'vfmadd132ss',
+'vfmadd213ss',
+'vfmadd231ss',
+'vfmaddsub132pd',
+'vfmaddsub213pd',
+'vfmaddsub231pd',
+'vfmaddsub132ps',
+'vfmaddsub213ps',
+'vfmaddsub231ps',
+'vfmsubadd132pd',
+'vfmsubadd213pd',
+'vfmsubadd231pd',
+'vfmsubadd132ps',
+'vfmsubadd213ps',
+'vfmsubadd231ps',
+'vfmsub132pd',
+'vfmsub213pd',
+'vfmsub231pd',
+'vfmsub132ps',
+'vfmsub213ps',
+'vfmsub231ps',
+'vfmsub132sd',
+'vfmsub213sd',
+'vfmsub231sd',
+'vfmsub132ss',
+'vfmsub213ss',
+'vfmsub231ss',
+'vfnmadd132pd',
+'vfnmadd213pd',
+'vfnmadd231pd',
+'vfnmadd132ps',
+'vfnmadd213ps',
+'vfnmadd231ps',
+'vfnmadd132sd',
+'vfnmadd213sd',
+'vfnmadd231sd',
+'vfnmadd132ss',
+'vfnmadd213ss',
+'vfnmadd231ss',
+'vfnmsub132pd',
+'vfnmsub213pd',
+'vfnmsub231pd',
+'vfnmsub132ps',
+'vfnmsub213ps',
+'vfnmsub231ps',
+'vfnmsub132sd',
+'vfnmsub213sd',
+'vfnmsub231sd',
+'vfnmsub132ss',
+'vfnmsub213ss',
+'vfnmsub231ss'
 );

+ 71 - 22
compiler/x86_64/x8664int.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 (
 'none',
-'aaa',
-'aad',
-'aam',
-'aas',
 'adc',
 'add',
 'and',
-'arpl',
-'bound',
 'bsf',
 'bsr',
 'bswap',
@@ -35,8 +29,6 @@
 'cpuid',
 'cwd',
 'cwde',
-'daa',
-'das',
 'dec',
 'div',
 'emms',
@@ -155,24 +147,20 @@
 'int1',
 'int03',
 'int3',
-'into',
 'invd',
 'invlpg',
 'iret',
 'iretd',
 'iretw',
 'iretq',
-'jcxz',
 'jecxz',
 'jrcxz',
 'jmp',
 'lahf',
 'lar',
 'lcall',
-'lds',
 'lea',
 'leave',
-'les',
 'lfs',
 'lgdt',
 'lgs',
@@ -266,11 +254,7 @@
 'pmvnzb',
 'pmvzb',
 'pop',
-'popa',
-'popad',
-'popaw',
 'popf',
-'popfd',
 'popfw',
 'popfq',
 'por',
@@ -300,11 +284,7 @@
 'punpckldq',
 'punpcklwd',
 'push',
-'pusha',
-'pushad',
-'pushaw',
 'pushf',
-'pushfd',
 'pushfw',
 'pushfq',
 'pxor',
@@ -329,7 +309,6 @@
 'rsm',
 'sahf',
 'sal',
-'salc',
 'sar',
 'sbb',
 'scasb',
@@ -946,6 +925,7 @@
 'vzeroupper',
 'andn',
 'bextr',
+'tzcnt',
 'rorx',
 'sarx',
 'shlx',
@@ -969,5 +949,74 @@
 'vpsllvq',
 'vpsravd',
 'vpsrlvd',
-'vpsrlvq'
+'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
+'vfmadd132pd',
+'vfmadd213pd',
+'vfmadd231pd',
+'vfmaddpd',
+'vfmadd132ps',
+'vfmadd213ps',
+'vfmadd231ps',
+'vfmadd132sd',
+'vfmadd213sd',
+'vfmadd231sd',
+'vfmadd132ss',
+'vfmadd213ss',
+'vfmadd231ss',
+'vfmaddsub132pd',
+'vfmaddsub213pd',
+'vfmaddsub231pd',
+'vfmaddsub132ps',
+'vfmaddsub213ps',
+'vfmaddsub231ps',
+'vfmsubadd132pd',
+'vfmsubadd213pd',
+'vfmsubadd231pd',
+'vfmsubadd132ps',
+'vfmsubadd213ps',
+'vfmsubadd231ps',
+'vfmsub132pd',
+'vfmsub213pd',
+'vfmsub231pd',
+'vfmsub132ps',
+'vfmsub213ps',
+'vfmsub231ps',
+'vfmsub132sd',
+'vfmsub213sd',
+'vfmsub231sd',
+'vfmsub132ss',
+'vfmsub213ss',
+'vfmsub231ss',
+'vfnmadd132pd',
+'vfnmadd213pd',
+'vfnmadd231pd',
+'vfnmadd132ps',
+'vfnmadd213ps',
+'vfnmadd231ps',
+'vfnmadd132sd',
+'vfnmadd213sd',
+'vfnmadd231sd',
+'vfnmadd132ss',
+'vfnmadd213ss',
+'vfnmadd231ss',
+'vfnmsub132pd',
+'vfnmsub213pd',
+'vfnmsub231pd',
+'vfnmsub132ps',
+'vfnmsub213ps',
+'vfnmsub231ps',
+'vfnmsub132sd',
+'vfnmsub213sd',
+'vfnmsub231sd',
+'vfnmsub132ss',
+'vfnmsub213ss',
+'vfnmsub231ss'
 );

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1849;
+1963;

+ 71 - 22
compiler/x86_64/x8664op.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 (
 A_NONE,
-A_AAA,
-A_AAD,
-A_AAM,
-A_AAS,
 A_ADC,
 A_ADD,
 A_AND,
-A_ARPL,
-A_BOUND,
 A_BSF,
 A_BSR,
 A_BSWAP,
@@ -35,8 +29,6 @@ A_CMPXCHG8B,
 A_CPUID,
 A_CWD,
 A_CWDE,
-A_DAA,
-A_DAS,
 A_DEC,
 A_DIV,
 A_EMMS,
@@ -155,24 +147,20 @@ A_INT01,
 A_INT1,
 A_INT03,
 A_INT3,
-A_INTO,
 A_INVD,
 A_INVLPG,
 A_IRET,
 A_IRETD,
 A_IRETW,
 A_IRETQ,
-A_JCXZ,
 A_JECXZ,
 A_JRCXZ,
 A_JMP,
 A_LAHF,
 A_LAR,
 A_LCALL,
-A_LDS,
 A_LEA,
 A_LEAVE,
-A_LES,
 A_LFS,
 A_LGDT,
 A_LGS,
@@ -266,11 +254,7 @@ A_PMVLZB,
 A_PMVNZB,
 A_PMVZB,
 A_POP,
-A_POPA,
-A_POPAD,
-A_POPAW,
 A_POPF,
-A_POPFD,
 A_POPFW,
 A_POPFQ,
 A_POR,
@@ -300,11 +284,7 @@ A_PUNPCKLBW,
 A_PUNPCKLDQ,
 A_PUNPCKLWD,
 A_PUSH,
-A_PUSHA,
-A_PUSHAD,
-A_PUSHAW,
 A_PUSHF,
-A_PUSHFD,
 A_PUSHFW,
 A_PUSHFQ,
 A_PXOR,
@@ -329,7 +309,6 @@ A_RSLDT,
 A_RSM,
 A_SAHF,
 A_SAL,
-A_SALC,
 A_SAR,
 A_SBB,
 A_SCASB,
@@ -946,6 +925,7 @@ A_VZEROALL,
 A_VZEROUPPER,
 A_ANDN,
 A_BEXTR,
+A_TZCNT,
 A_RORX,
 A_SARX,
 A_SHLX,
@@ -969,5 +949,74 @@ A_VPSLLVD,
 A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRLVD,
-A_VPSRLVQ
+A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
+A_VFMADD132PD,
+A_VFMADD213PD,
+A_VFMADD231PD,
+A_VFMADDPD,
+A_VFMADD132PS,
+A_VFMADD213PS,
+A_VFMADD231PS,
+A_VFMADD132SD,
+A_VFMADD213SD,
+A_VFMADD231SD,
+A_VFMADD132SS,
+A_VFMADD213SS,
+A_VFMADD231SS,
+A_VFMADDSUB132PD,
+A_VFMADDSUB213PD,
+A_VFMADDSUB231PD,
+A_VFMADDSUB132PS,
+A_VFMADDSUB213PS,
+A_VFMADDSUB231PS,
+A_VFMSUBADD132PD,
+A_VFMSUBADD213PD,
+A_VFMSUBADD231PD,
+A_VFMSUBADD132PS,
+A_VFMSUBADD213PS,
+A_VFMSUBADD231PS,
+A_VFMSUB132PD,
+A_VFMSUB213PD,
+A_VFMSUB231PD,
+A_VFMSUB132PS,
+A_VFMSUB213PS,
+A_VFMSUB231PS,
+A_VFMSUB132SD,
+A_VFMSUB213SD,
+A_VFMSUB231SD,
+A_VFMSUB132SS,
+A_VFMSUB213SS,
+A_VFMSUB231SS,
+A_VFNMADD132PD,
+A_VFNMADD213PD,
+A_VFNMADD231PD,
+A_VFNMADD132PS,
+A_VFNMADD213PS,
+A_VFNMADD231PS,
+A_VFNMADD132SD,
+A_VFNMADD213SD,
+A_VFNMADD231SD,
+A_VFNMADD132SS,
+A_VFNMADD213SS,
+A_VFNMADD231SS,
+A_VFNMSUB132PD,
+A_VFNMSUB213PD,
+A_VFNMSUB231PD,
+A_VFNMSUB132PS,
+A_VFNMSUB213PS,
+A_VFNMSUB231PS,
+A_VFNMSUB132SD,
+A_VFNMSUB213SD,
+A_VFNMSUB231SD,
+A_VFNMSUB132SS,
+A_VFNMSUB213SS,
+A_VFNMSUB231SS
 );

+ 140 - 91
compiler/x86_64/x8664pro.inc

@@ -1,15 +1,9 @@
 { don't edit, this file is generated from x86ins.dat }
 (
 (Ch: (Ch_None, Ch_None, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
-(Ch: (Ch_MEAX, Ch_WFlags, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
-(Ch: (Ch_WFlags, Ch_None, Ch_None)),
-(Ch: (Ch_Rop1, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
 (Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
 (Ch: (Ch_MOp1, Ch_None, Ch_None)),
@@ -35,8 +29,6 @@
 (Ch: (Ch_All, Ch_None, Ch_none)),
 (Ch: (Ch_MEAX, Ch_WEDX, Ch_None)),
 (Ch: (Ch_MEAX, Ch_None, Ch_None)),
-(Ch: (Ch_MEAX, Ch_None, Ch_None)),
-(Ch: (Ch_MEAX, Ch_None, Ch_None)),
 (Ch: (Ch_Mop1, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWEAX, Ch_WEDX, Ch_WFlags)),
 (Ch: (Ch_FPU, Ch_None, Ch_None)),
@@ -161,8 +153,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_RECX, Ch_None, Ch_None)),
 (Ch: (Ch_ROp1, Ch_None, Ch_None)),
@@ -170,10 +160,8 @@
 (Ch: (Ch_Wop2, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WEBP, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
@@ -224,8 +212,8 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -266,10 +254,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop1, Ch_RWESP, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFLAGS, Ch_None)),
 (Ch: (Ch_RWESP, Ch_WFlags, Ch_None)),
@@ -300,10 +284,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Rop1, Ch_RWESP, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFLAGS, Ch_None)),
 (Ch: (Ch_RWESP, Ch_RFlags, Ch_None)),
@@ -329,7 +309,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_WFlags, Ch_REAX, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
-(Ch: (Ch_WEAX, Ch_RFLAGS, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_WFlags)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_RWFlags)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -389,7 +368,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
+(Ch: (Ch_ROp1, Ch_RWOp2, Ch_RFLAGS)),
 (Ch: (Ch_RFLAGS, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
@@ -489,8 +468,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -570,7 +549,7 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -730,6 +709,10 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -753,6 +736,12 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -761,28 +750,20 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -796,7 +777,15 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -859,6 +848,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -867,6 +864,24 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -876,15 +891,26 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -893,8 +919,17 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -923,51 +958,65 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None))
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1))
 );

+ 807 - 9
compiler/x86_64/x8664tab.inc

@@ -2544,17 +2544,10 @@
   (
     opcode  : A_LEA;
     ops     : 2;
-    optypes : (ot_reg32 or ot_bits64,ot_memory,ot_none,ot_none);
-    code    : #208#1#141#72;
+    optypes : (ot_reg16 or ot_bits32 or ot_bits64,ot_memory,ot_none,ot_none);
+    code    : #193#208#1#141#72;
     flags   : if_8086
   ),
-  (
-    opcode  : A_LEA;
-    ops     : 2;
-    optypes : (ot_reg32 or ot_bits64,ot_immediate,ot_none,ot_none);
-    code    : #208#1#141#72;
-    flags   : if_8086 or if_sd
-  ),
   (
     opcode  : A_LEAVE;
     ops     : 0;
@@ -12579,6 +12572,13 @@
     code    : #242#243#249#1#247#62#72;
     flags   : if_bmi1 or if_x86_64
   ),
+  (
+    opcode  : A_TZCNT;
+    ops     : 2;
+    optypes : (ot_reg16 or ot_bits32 or ot_bits64,ot_rm_gpr,ot_none,ot_none);
+    code    : #208#219#2#15#188#72;
+    flags   : if_bmi1 or if_sm
+  ),
   (
     opcode  : A_RORX;
     ops     : 3;
@@ -12942,5 +12942,803 @@
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VFMADD132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_xmmreg);
+    code    : #241#242#250#1#105#61#80#247;
+    flags   : if_fma4
+  ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmreg,ot_xmmrm);
+    code    : #241#242#250#243#1#105#61#88#246;
+    flags   : if_fma4
+  ),
+  (
+    opcode  : A_VFMADD132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#152#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#168#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#184#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#153#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#169#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#185#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#153#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#169#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADD231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#185#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#150#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#166#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMADDSUB231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#182#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#151#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#167#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUBADD231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#183#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#154#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#170#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#186#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#155#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#171#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#187#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#155#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#171#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFMSUB231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#187#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#156#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#172#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#188#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#157#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#173#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#189#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#157#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#173#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMADD231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#189#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#243#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#158#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#174#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231PS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #241#242#244#249#1#190#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#159#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#175#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231SD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#243#1#191#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB132SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#159#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB213SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#175#61#80;
+    flags   : if_fma
+  ),
+  (
+    opcode  : A_VFNMSUB231SS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #241#242#249#1#191#61#80;
+    flags   : if_fma
   )
 );