Browse Source

* regenerated

git-svn-id: trunk@45778 -
florian 5 years ago
parent
commit
ac5407268c
6 changed files with 653 additions and 361 deletions
  1. 109 54
      compiler/x86/x86mmfirst.inc
  2. 278 102
      compiler/x86/x86mmsecond.inc
  3. 114 59
      compiler/x86/x86mmtype.inc
  4. 49 49
      rtl/i386/cpummprocs.inc
  5. 46 47
      rtl/x86_64/cpumminnr.inc
  6. 57 50
      rtl/x86_64/cpummprocs.inc

+ 109 - 54
compiler/x86/x86mmfirst.inc

@@ -69,14 +69,7 @@ in_x86_movss_from_val
 in_x86_movlps
 ,in_x86_movhps
 ,in_x86_cvtpi2ps_from_mem
-,in_x86_addsd_from_mem
-,in_x86_divsd_from_mem
 ,in_x86_maxsd_from_mem
-,in_x86_minsd_from_mem
-,in_x86_mulsd_from_mem
-,in_x86_subsd_from_mem
-,in_x86_comisd_from_mem
-,in_x86_ucomisd_from_mem
 ,in_x86_cvtdq2pd_from_mem
 ,in_x86_cvtpi2pd_from_mem
 ,in_x86_cvtps2pd_from_mem
@@ -96,15 +89,6 @@ in_x86_movlps_to_mem
   end;
 in_x86_movlhps
 ,in_x86_movhlps
-,in_x86_addss
-,in_x86_subss
-,in_x86_mulss
-,in_x86_divss
-,in_x86_rcpss
-,in_x86_sqrtss
-,in_x86_maxss
-,in_x86_minss
-,in_x86_rsqrtss
 ,in_x86_addps
 ,in_x86_subps
 ,in_x86_mulps
@@ -121,23 +105,15 @@ in_x86_movlhps
 ,in_x86_unpckhps
 ,in_x86_unpcklps
 ,in_x86_addpd
-,in_x86_addsd
 ,in_x86_divpd
-,in_x86_divsd
 ,in_x86_maxpd
-,in_x86_maxsd
 ,in_x86_minpd
-,in_x86_minsd
 ,in_x86_mulpd
-,in_x86_mulsd
 ,in_x86_subpd
-,in_x86_subsd
 ,in_x86_andpd
 ,in_x86_andnpd
 ,in_x86_orpd
 ,in_x86_xorpd
-,in_x86_comisd
-,in_x86_ucomisd
 ,in_x86_unpckhpd
 ,in_x86_unpcklpd
 ,in_x86_cvtdq2pd
@@ -147,7 +123,6 @@ in_x86_movlhps
 ,in_x86_cvtps2dq
 ,in_x86_cvtps2pd
 ,in_x86_cvtsd2ss
-,in_x86_cvtss2sd
 ,in_x86_cvttpd2dq
 ,in_x86_cvttps2dq
 ,in_x86_packssdw
@@ -245,6 +220,20 @@ in_x86_movlhps
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
+in_x86_addss
+,in_x86_subss
+,in_x86_mulss
+,in_x86_divss
+,in_x86_rcpss
+,in_x86_sqrtss
+,in_x86_maxss
+,in_x86_minss
+,in_x86_rsqrtss
+: //var r0:f32;r1:f32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_addss_from_mem
 ,in_x86_subss_from_mem
 ,in_x86_mulss_from_mem
@@ -255,9 +244,7 @@ in_x86_addss_from_mem
 ,in_x86_minss_from_mem
 ,in_x86_rsqrtss_from_mem
 ,in_x86_cvtsi2ss_from_mem
-,in_x86_cvtsi2sd_from_mem
-,in_x86_cvtss2sd_from_mem
-: //var r0:xmm;r1:ptr32;
+: //var r0:f32;r1:ptr32;
   begin
     expectloc:=LOC_MMREGISTER;
     result:=nil;
@@ -391,10 +378,20 @@ in_x86_addps_from_mem
     result:=nil;
   end;
 in_x86_cmpss
-,in_x86_cmpps
+: //var r0:f32;r1:f32;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_cmpss_from_mem
+: //var r0:f32;r1:ptr32;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_cmpps
 ,in_x86_shufps
 ,in_x86_cmppd
-,in_x86_cmpsd
 ,in_x86_shufpd
 ,in_x86_palignr
 ,in_x86_dpps
@@ -413,14 +410,6 @@ in_x86_cmpss
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
-in_x86_cmpss_from_mem
-,in_x86_insertps_from_mem
-,in_x86_pinsrd_from_mem
-: //var r0:xmm;r1:ptr32;imm:i32;
-  begin
-    expectloc:=LOC_MMREGISTER;
-    result:=nil;
-  end;
 in_x86_cmpps_from_mem
 ,in_x86_shufps_from_mem
 ,in_x86_cmppd_from_mem
@@ -442,14 +431,14 @@ in_x86_cmpps_from_mem
     result:=nil;
   end;
 in_x86_cvtsi2ss
-: //var r0:xmm;r1:reg;
+: //var r0:f32;r1:reg;
   begin
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
 in_x86_cvtss2si
 ,in_x86_cvttss2si
-: //out r0:reg;r1:xmm;
+: //out r0:reg;r1:f32;
   begin
     expectloc:=LOC_REGISTER;
     result:=nil;
@@ -558,8 +547,38 @@ in_x86_movsd_from_val
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
+in_x86_addsd
+,in_x86_divsd
+,in_x86_minsd
+,in_x86_subsd
+,in_x86_comisd
+,in_x86_ucomisd
+,in_x86_cvtss2sd
+: //var r0:f64;r1:f64;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_addsd_from_mem
+,in_x86_divsd_from_mem
+,in_x86_minsd_from_mem
+,in_x86_mulsd_from_mem
+,in_x86_subsd_from_mem
+,in_x86_comisd_from_mem
+,in_x86_ucomisd_from_mem
+: //var r0:f64;r1:ptr64;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_maxsd
+,in_x86_mulsd
+: //var r0:f64;r1:xmm;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_sqrtpd
-,in_x86_sqrtsd
 ,in_x86_movddup
 ,in_x86_movsldup
 ,in_x86_movshdup
@@ -583,22 +602,26 @@ in_x86_sqrtpd
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
+in_x86_sqrtsd
+: //out r0:f64;r1:f64;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_sqrtsd_from_mem
-,in_x86_movq_from_mem
-,in_x86_movddup_from_mem
-,in_x86_pmovsxbw_from_mem
-,in_x86_pmovzxbw_from_mem
-,in_x86_pmovsxwd_from_mem
-,in_x86_pmovzxwd_from_mem
-,in_x86_pmovsxdq_from_mem
-,in_x86_pmovzxdq_from_mem
-: //out r0:xmm;r1:ptr64;
+: //out r0:f64;r1:ptr64;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_cmpsd
+: //var r0:f64;r1:f64;imm:i32;
   begin
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
 in_x86_cmpsd_from_mem
-: //var r0:xmm;r1:ptr64;imm:i32;
+: //var r0:f64;r1:ptr64;imm:i32;
   begin
     expectloc:=LOC_MMREGISTER;
     result:=nil;
@@ -632,7 +655,14 @@ in_x86_cvtsd2si_from_mem
     result:=nil;
   end;
 in_x86_cvtsi2sd
-: //var r0:xmm;r1:r32;
+: //var r0:f64;r1:r32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_cvtsi2sd_from_mem
+,in_x86_cvtss2sd_from_mem
+: //var r0:f64;r1:ptr32;
   begin
     expectloc:=LOC_MMREGISTER;
     result:=nil;
@@ -643,6 +673,19 @@ in_x86_movd_from_reg
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
+in_x86_movq_from_mem
+,in_x86_movddup_from_mem
+,in_x86_pmovsxbw_from_mem
+,in_x86_pmovzxbw_from_mem
+,in_x86_pmovsxwd_from_mem
+,in_x86_pmovzxwd_from_mem
+,in_x86_pmovsxdq_from_mem
+,in_x86_pmovzxdq_from_mem
+: //out r0:xmm;r1:ptr64;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_pmovmskb
 : //var r0:r32;r1:xmm;
   begin
@@ -702,7 +745,6 @@ in_x86_pshufhw
 ,in_x86_roundps
 ,in_x86_roundss
 ,in_x86_roundpd
-,in_x86_roundsd
 : //out r0:xmm;r1:xmm;imm:i32;
   begin
     expectloc:=LOC_MMREGISTER;
@@ -740,8 +782,21 @@ in_x86_roundss_from_mem
     expectloc:=LOC_MMREGISTER;
     result:=nil;
   end;
+in_x86_roundsd
+: //out r0:f64;r1:f64;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_roundsd_from_mem
-: //out r0:xmm;r1:ptr64;imm:i32;
+: //out r0:f64;r1:ptr64;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_insertps_from_mem
+,in_x86_pinsrd_from_mem
+: //var r0:xmm;r1:ptr32;imm:i32;
   begin
     expectloc:=LOC_MMREGISTER;
     result:=nil;

+ 278 - 102
compiler/x86/x86mmsecond.inc

@@ -171,14 +171,7 @@ in_x86_movss_from_val
 in_x86_movlps
 ,in_x86_movhps
 ,in_x86_cvtpi2ps_from_mem
-,in_x86_addsd_from_mem
-,in_x86_divsd_from_mem
 ,in_x86_maxsd_from_mem
-,in_x86_minsd_from_mem
-,in_x86_mulsd_from_mem
-,in_x86_subsd_from_mem
-,in_x86_comisd_from_mem
-,in_x86_ucomisd_from_mem
 ,in_x86_cvtdq2pd_from_mem
 ,in_x86_cvtpi2pd_from_mem
 ,in_x86_cvtps2pd_from_mem
@@ -190,14 +183,7 @@ in_x86_movlps
       in_x86_cvtps2pd_from_mem: begin op:=A_cvtps2pd end;
       in_x86_cvtpi2pd_from_mem: begin op:=A_cvtpi2pd end;
       in_x86_cvtdq2pd_from_mem: begin op:=A_cvtdq2pd end;
-      in_x86_ucomisd_from_mem: begin op:=A_ucomisd end;
-      in_x86_comisd_from_mem: begin op:=A_comisd end;
-      in_x86_subsd_from_mem: begin op:=A_subsd end;
-      in_x86_mulsd_from_mem: begin op:=A_mulsd end;
-      in_x86_minsd_from_mem: begin op:=A_minsd end;
       in_x86_maxsd_from_mem: begin op:=A_maxsd end;
-      in_x86_divsd_from_mem: begin op:=A_divsd end;
-      in_x86_addsd_from_mem: begin op:=A_addsd end;
       in_x86_cvtpi2ps_from_mem: begin op:=A_cvtpi2ps end;
       in_x86_movhps: begin op:=A_movhps end;
       in_x86_movlps: begin op:=A_movlps; end;
@@ -237,15 +223,6 @@ in_x86_movlps_to_mem
   end;
 in_x86_movlhps
 ,in_x86_movhlps
-,in_x86_addss
-,in_x86_subss
-,in_x86_mulss
-,in_x86_divss
-,in_x86_rcpss
-,in_x86_sqrtss
-,in_x86_maxss
-,in_x86_minss
-,in_x86_rsqrtss
 ,in_x86_addps
 ,in_x86_subps
 ,in_x86_mulps
@@ -262,23 +239,15 @@ in_x86_movlhps
 ,in_x86_unpckhps
 ,in_x86_unpcklps
 ,in_x86_addpd
-,in_x86_addsd
 ,in_x86_divpd
-,in_x86_divsd
 ,in_x86_maxpd
-,in_x86_maxsd
 ,in_x86_minpd
-,in_x86_minsd
 ,in_x86_mulpd
-,in_x86_mulsd
 ,in_x86_subpd
-,in_x86_subsd
 ,in_x86_andpd
 ,in_x86_andnpd
 ,in_x86_orpd
 ,in_x86_xorpd
-,in_x86_comisd
-,in_x86_ucomisd
 ,in_x86_unpckhpd
 ,in_x86_unpcklpd
 ,in_x86_cvtdq2pd
@@ -288,7 +257,6 @@ in_x86_movlhps
 ,in_x86_cvtps2dq
 ,in_x86_cvtps2pd
 ,in_x86_cvtsd2ss
-,in_x86_cvtss2sd
 ,in_x86_cvttpd2dq
 ,in_x86_cvttps2dq
 ,in_x86_packssdw
@@ -476,7 +444,6 @@ in_x86_movlhps
       in_x86_packssdw: begin op:=A_packssdw end;
       in_x86_cvttps2dq: begin op:=A_cvttps2dq end;
       in_x86_cvttpd2dq: begin op:=A_cvttpd2dq end;
-      in_x86_cvtss2sd: begin op:=A_cvtss2sd end;
       in_x86_cvtsd2ss: begin op:=A_cvtsd2ss end;
       in_x86_cvtps2pd: begin op:=A_cvtps2pd end;
       in_x86_cvtps2dq: begin op:=A_cvtps2dq end;
@@ -486,23 +453,15 @@ in_x86_movlhps
       in_x86_cvtdq2pd: begin op:=A_cvtdq2pd end;
       in_x86_unpcklpd: begin op:=A_unpcklpd end;
       in_x86_unpckhpd: begin op:=A_unpckhpd end;
-      in_x86_ucomisd: begin op:=A_ucomisd end;
-      in_x86_comisd: begin op:=A_comisd end;
       in_x86_xorpd: begin op:=A_xorpd end;
       in_x86_orpd: begin op:=A_orpd end;
       in_x86_andnpd: begin op:=A_andnpd end;
       in_x86_andpd: begin op:=A_andpd end;
-      in_x86_subsd: begin op:=A_subsd end;
       in_x86_subpd: begin op:=A_subpd end;
-      in_x86_mulsd: begin op:=A_mulsd end;
       in_x86_mulpd: begin op:=A_mulpd end;
-      in_x86_minsd: begin op:=A_minsd end;
       in_x86_minpd: begin op:=A_minpd end;
-      in_x86_maxsd: begin op:=A_maxsd end;
       in_x86_maxpd: begin op:=A_maxpd end;
-      in_x86_divsd: begin op:=A_divsd end;
       in_x86_divpd: begin op:=A_divpd end;
-      in_x86_addsd: begin op:=A_addsd end;
       in_x86_addpd: begin op:=A_addpd end;
       in_x86_unpcklps: begin op:=A_unpcklps end;
       in_x86_unpckhps: begin op:=A_unpckhps end;
@@ -519,6 +478,33 @@ in_x86_movlhps
       in_x86_mulps: begin op:=A_mulps end;
       in_x86_subps: begin op:=A_subps end;
       in_x86_addps: begin op:=A_addps end;
+      in_x86_movhlps: begin op:=A_movhlps end;
+      in_x86_movlhps: begin op:=A_movlhps; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(2);
+
+    for i := 1 to 2 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[2].location, true);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[2].location.register,paraarray[1].location.register));
+  end;
+in_x86_addss
+,in_x86_subss
+,in_x86_mulss
+,in_x86_divss
+,in_x86_rcpss
+,in_x86_sqrtss
+,in_x86_maxss
+,in_x86_minss
+,in_x86_rsqrtss
+: //var r0:f32;r1:f32;
+  begin
+    case inlinenumber of
       in_x86_rsqrtss: begin op:=A_rsqrtss end;
       in_x86_minss: begin op:=A_minss end;
       in_x86_maxss: begin op:=A_maxss end;
@@ -527,9 +513,7 @@ in_x86_movlhps
       in_x86_divss: begin op:=A_divss end;
       in_x86_mulss: begin op:=A_mulss end;
       in_x86_subss: begin op:=A_subss end;
-      in_x86_addss: begin op:=A_addss end;
-      in_x86_movhlps: begin op:=A_movhlps end;
-      in_x86_movlhps: begin op:=A_movlhps; end;
+      in_x86_addss: begin op:=A_addss; end;
       else
         Internalerror(2020010201);
     end;
@@ -553,13 +537,9 @@ in_x86_addss_from_mem
 ,in_x86_minss_from_mem
 ,in_x86_rsqrtss_from_mem
 ,in_x86_cvtsi2ss_from_mem
-,in_x86_cvtsi2sd_from_mem
-,in_x86_cvtss2sd_from_mem
-: //var r0:xmm;r1:ptr32;
+: //var r0:f32;r1:ptr32;
   begin
     case inlinenumber of
-      in_x86_cvtss2sd_from_mem: begin op:=A_cvtss2sd end;
-      in_x86_cvtsi2sd_from_mem: begin op:=A_cvtsi2sd end;
       in_x86_cvtsi2ss_from_mem: begin op:=A_cvtsi2ss end;
       in_x86_rsqrtss_from_mem: begin op:=A_rsqrtss end;
       in_x86_minss_from_mem: begin op:=A_minss end;
@@ -846,10 +826,44 @@ in_x86_addps_from_mem
     current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,paraarray[2].location.reference,paraarray[1].location.register));
   end;
 in_x86_cmpss
-,in_x86_cmpps
+: //var r0:f32;r1:f32;imm:i32;
+  begin
+    case inlinenumber of
+      in_x86_cmpss: begin op:=A_cmpss; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(3);
+
+    for i := 1 to 3 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[2].location, true);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(op,S_NO,GetConstInt(paraarray[3]),paraarray[2].location.register,paraarray[1].location.register));
+  end;
+in_x86_cmpss_from_mem
+: //var r0:f32;r1:ptr32;imm:i32;
+  begin
+    case inlinenumber of
+      in_x86_cmpss_from_mem: begin op:=A_cmpss; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(3);
+
+    for i := 1 to 3 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_make_ref(paraarray[2].location);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_const_ref_reg(op,S_NO,GetConstInt(paraarray[3]),paraarray[2].location.reference,paraarray[1].location.register));
+  end;
+in_x86_cmpps
 ,in_x86_shufps
 ,in_x86_cmppd
-,in_x86_cmpsd
 ,in_x86_shufpd
 ,in_x86_palignr
 ,in_x86_dpps
@@ -879,11 +893,9 @@ in_x86_cmpss
       in_x86_dpps: begin op:=A_dpps end;
       in_x86_palignr: begin op:=A_palignr end;
       in_x86_shufpd: begin op:=A_shufpd end;
-      in_x86_cmpsd: begin op:=A_cmpsd end;
       in_x86_cmppd: begin op:=A_cmppd end;
       in_x86_shufps: begin op:=A_shufps end;
-      in_x86_cmpps: begin op:=A_cmpps end;
-      in_x86_cmpss: begin op:=A_cmpss; end;
+      in_x86_cmpps: begin op:=A_cmpps; end;
       else
         Internalerror(2020010201);
     end;
@@ -897,28 +909,6 @@ in_x86_cmpss
     location:=paraarray[1].location;
     current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(op,S_NO,GetConstInt(paraarray[3]),paraarray[2].location.register,paraarray[1].location.register));
   end;
-in_x86_cmpss_from_mem
-,in_x86_insertps_from_mem
-,in_x86_pinsrd_from_mem
-: //var r0:xmm;r1:ptr32;imm:i32;
-  begin
-    case inlinenumber of
-      in_x86_pinsrd_from_mem: begin op:=A_pinsrd end;
-      in_x86_insertps_from_mem: begin op:=A_insertps end;
-      in_x86_cmpss_from_mem: begin op:=A_cmpss; end;
-      else
-        Internalerror(2020010201);
-    end;
-
-    GetParameters(3);
-
-    for i := 1 to 3 do secondpass(paraarray[i]);
-
-    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
-    location_make_ref(paraarray[2].location);
-    location:=paraarray[1].location;
-    current_asmdata.CurrAsmList.concat(taicpu.op_const_ref_reg(op,S_NO,GetConstInt(paraarray[3]),paraarray[2].location.reference,paraarray[1].location.register));
-  end;
 in_x86_cmpps_from_mem
 ,in_x86_shufps_from_mem
 ,in_x86_cmppd_from_mem
@@ -966,7 +956,7 @@ in_x86_cmpps_from_mem
     current_asmdata.CurrAsmList.concat(taicpu.op_const_ref_reg(op,S_NO,GetConstInt(paraarray[3]),paraarray[2].location.reference,paraarray[1].location.register));
   end;
 in_x86_cvtsi2ss
-: //var r0:xmm;r1:reg;
+: //var r0:f32;r1:reg;
   begin
     case inlinenumber of
       in_x86_cvtsi2ss: begin op:=A_cvtsi2ss; end;
@@ -985,7 +975,7 @@ in_x86_cvtsi2ss
   end;
 in_x86_cvtss2si
 ,in_x86_cvttss2si
-: //out r0:reg;r1:xmm;
+: //out r0:reg;r1:f32;
   begin
     case inlinenumber of
       in_x86_cvttss2si: begin op:=A_cvttss2si end;
@@ -1295,8 +1285,87 @@ in_x86_movsd_from_val
     location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[1].location.register,location.register));
   end;
+in_x86_addsd
+,in_x86_divsd
+,in_x86_minsd
+,in_x86_subsd
+,in_x86_comisd
+,in_x86_ucomisd
+,in_x86_cvtss2sd
+: //var r0:f64;r1:f64;
+  begin
+    case inlinenumber of
+      in_x86_cvtss2sd: begin op:=A_cvtss2sd end;
+      in_x86_ucomisd: begin op:=A_ucomisd end;
+      in_x86_comisd: begin op:=A_comisd end;
+      in_x86_subsd: begin op:=A_subsd end;
+      in_x86_minsd: begin op:=A_minsd end;
+      in_x86_divsd: begin op:=A_divsd end;
+      in_x86_addsd: begin op:=A_addsd; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(2);
+
+    for i := 1 to 2 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[2].location, true);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[2].location.register,paraarray[1].location.register));
+  end;
+in_x86_addsd_from_mem
+,in_x86_divsd_from_mem
+,in_x86_minsd_from_mem
+,in_x86_mulsd_from_mem
+,in_x86_subsd_from_mem
+,in_x86_comisd_from_mem
+,in_x86_ucomisd_from_mem
+: //var r0:f64;r1:ptr64;
+  begin
+    case inlinenumber of
+      in_x86_ucomisd_from_mem: begin op:=A_ucomisd end;
+      in_x86_comisd_from_mem: begin op:=A_comisd end;
+      in_x86_subsd_from_mem: begin op:=A_subsd end;
+      in_x86_mulsd_from_mem: begin op:=A_mulsd end;
+      in_x86_minsd_from_mem: begin op:=A_minsd end;
+      in_x86_divsd_from_mem: begin op:=A_divsd end;
+      in_x86_addsd_from_mem: begin op:=A_addsd; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(2);
+
+    for i := 1 to 2 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_make_ref(paraarray[2].location);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,paraarray[2].location.reference,paraarray[1].location.register));
+  end;
+in_x86_maxsd
+,in_x86_mulsd
+: //var r0:f64;r1:xmm;
+  begin
+    case inlinenumber of
+      in_x86_mulsd: begin op:=A_mulsd end;
+      in_x86_maxsd: begin op:=A_maxsd; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(2);
+
+    for i := 1 to 2 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[2].location, true);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[2].location.register,paraarray[1].location.register));
+  end;
 in_x86_sqrtpd
-,in_x86_sqrtsd
 ,in_x86_movddup
 ,in_x86_movsldup
 ,in_x86_movshdup
@@ -1336,7 +1405,6 @@ in_x86_sqrtpd
       in_x86_movshdup: begin op:=A_movshdup end;
       in_x86_movsldup: begin op:=A_movsldup end;
       in_x86_movddup: begin op:=A_movddup end;
-      in_x86_sqrtsd: begin op:=A_sqrtsd end;
       in_x86_sqrtpd: begin op:=A_sqrtpd; end;
       else
         Internalerror(2020010201);
@@ -1346,31 +1414,33 @@ in_x86_sqrtpd
 
     for i := 1 to 1 do secondpass(paraarray[i]);
 
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, true);
+    location_reset(location,LOC_MMREGISTER,OS_M128);
+    location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
+    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[1].location.register,location.register));
+  end;
+in_x86_sqrtsd
+: //out r0:f64;r1:f64;
+  begin
+    case inlinenumber of
+      in_x86_sqrtsd: begin op:=A_sqrtsd; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(1);
+
+    for i := 1 to 1 do secondpass(paraarray[i]);
+
     location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, true);
     location_reset(location,LOC_MMREGISTER,OS_M128);
     location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[1].location.register,location.register));
   end;
 in_x86_sqrtsd_from_mem
-,in_x86_movq_from_mem
-,in_x86_movddup_from_mem
-,in_x86_pmovsxbw_from_mem
-,in_x86_pmovzxbw_from_mem
-,in_x86_pmovsxwd_from_mem
-,in_x86_pmovzxwd_from_mem
-,in_x86_pmovsxdq_from_mem
-,in_x86_pmovzxdq_from_mem
-: //out r0:xmm;r1:ptr64;
+: //out r0:f64;r1:ptr64;
   begin
     case inlinenumber of
-      in_x86_pmovzxdq_from_mem: begin op:=A_pmovzxdq end;
-      in_x86_pmovsxdq_from_mem: begin op:=A_pmovsxdq end;
-      in_x86_pmovzxwd_from_mem: begin op:=A_pmovzxwd end;
-      in_x86_pmovsxwd_from_mem: begin op:=A_pmovsxwd end;
-      in_x86_pmovzxbw_from_mem: begin op:=A_pmovzxbw end;
-      in_x86_pmovsxbw_from_mem: begin op:=A_pmovsxbw end;
-      in_x86_movddup_from_mem: begin op:=A_movddup end;
-      in_x86_movq_from_mem: begin op:=A_movq end;
       in_x86_sqrtsd_from_mem: begin op:=A_sqrtsd; end;
       else
         Internalerror(2020010201);
@@ -1385,8 +1455,26 @@ in_x86_sqrtsd_from_mem
     location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
     current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,paraarray[1].location.reference,location.register));
   end;
+in_x86_cmpsd
+: //var r0:f64;r1:f64;imm:i32;
+  begin
+    case inlinenumber of
+      in_x86_cmpsd: begin op:=A_cmpsd; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(3);
+
+    for i := 1 to 3 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[2].location, true);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(op,S_NO,GetConstInt(paraarray[3]),paraarray[2].location.register,paraarray[1].location.register));
+  end;
 in_x86_cmpsd_from_mem
-: //var r0:xmm;r1:ptr64;imm:i32;
+: //var r0:f64;r1:ptr64;imm:i32;
   begin
     case inlinenumber of
       in_x86_cmpsd_from_mem: begin op:=A_cmpsd; end;
@@ -1484,7 +1572,7 @@ in_x86_cvtsd2si_from_mem
     current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,paraarray[2].location.reference,paraarray[1].location.register));
   end;
 in_x86_cvtsi2sd
-: //var r0:xmm;r1:r32;
+: //var r0:f64;r1:r32;
   begin
     case inlinenumber of
       in_x86_cvtsi2sd: begin op:=A_cvtsi2sd; end;
@@ -1501,6 +1589,26 @@ in_x86_cvtsi2sd
     location:=paraarray[1].location;
     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[2].location.register,paraarray[1].location.register));
   end;
+in_x86_cvtsi2sd_from_mem
+,in_x86_cvtss2sd_from_mem
+: //var r0:f64;r1:ptr32;
+  begin
+    case inlinenumber of
+      in_x86_cvtss2sd_from_mem: begin op:=A_cvtss2sd end;
+      in_x86_cvtsi2sd_from_mem: begin op:=A_cvtsi2sd; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(2);
+
+    for i := 1 to 2 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_make_ref(paraarray[2].location);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,paraarray[2].location.reference,paraarray[1].location.register));
+  end;
 in_x86_movd_from_reg
 : //out r0:xmm;r1:r32;
   begin
@@ -1519,6 +1627,38 @@ in_x86_movd_from_reg
     location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,paraarray[1].location.register,location.register));
   end;
+in_x86_movq_from_mem
+,in_x86_movddup_from_mem
+,in_x86_pmovsxbw_from_mem
+,in_x86_pmovzxbw_from_mem
+,in_x86_pmovsxwd_from_mem
+,in_x86_pmovzxwd_from_mem
+,in_x86_pmovsxdq_from_mem
+,in_x86_pmovzxdq_from_mem
+: //out r0:xmm;r1:ptr64;
+  begin
+    case inlinenumber of
+      in_x86_pmovzxdq_from_mem: begin op:=A_pmovzxdq end;
+      in_x86_pmovsxdq_from_mem: begin op:=A_pmovsxdq end;
+      in_x86_pmovzxwd_from_mem: begin op:=A_pmovzxwd end;
+      in_x86_pmovsxwd_from_mem: begin op:=A_pmovsxwd end;
+      in_x86_pmovzxbw_from_mem: begin op:=A_pmovzxbw end;
+      in_x86_pmovsxbw_from_mem: begin op:=A_pmovsxbw end;
+      in_x86_movddup_from_mem: begin op:=A_movddup end;
+      in_x86_movq_from_mem: begin op:=A_movq; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(1);
+
+    for i := 1 to 1 do secondpass(paraarray[i]);
+
+    location_make_ref(paraarray[1].location);
+    location_reset(location,LOC_MMREGISTER,OS_M128);
+    location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
+    current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,paraarray[1].location.reference,location.register));
+  end;
 in_x86_pmovmskb
 : //var r0:r32;r1:xmm;
   begin
@@ -1674,11 +1814,9 @@ in_x86_pshufhw
 ,in_x86_roundps
 ,in_x86_roundss
 ,in_x86_roundpd
-,in_x86_roundsd
 : //out r0:xmm;r1:xmm;imm:i32;
   begin
     case inlinenumber of
-      in_x86_roundsd: begin op:=A_roundsd end;
       in_x86_roundpd: begin op:=A_roundpd end;
       in_x86_roundss: begin op:=A_roundss end;
       in_x86_roundps: begin op:=A_roundps end;
@@ -1794,8 +1932,26 @@ in_x86_roundss_from_mem
     location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
     current_asmdata.CurrAsmList.concat(taicpu.op_const_ref_reg(op,S_NO,GetConstInt(paraarray[2]),paraarray[1].location.reference,location.register));
   end;
+in_x86_roundsd
+: //out r0:f64;r1:f64;imm:i32;
+  begin
+    case inlinenumber of
+      in_x86_roundsd: begin op:=A_roundsd; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(2);
+
+    for i := 1 to 2 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, true);
+    location_reset(location,LOC_MMREGISTER,OS_M128);
+    location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
+    current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(op,S_NO,GetConstInt(paraarray[2]),paraarray[1].location.register,location.register));
+  end;
 in_x86_roundsd_from_mem
-: //out r0:xmm;r1:ptr64;imm:i32;
+: //out r0:f64;r1:ptr64;imm:i32;
   begin
     case inlinenumber of
       in_x86_roundsd_from_mem: begin op:=A_roundsd; end;
@@ -1812,6 +1968,26 @@ in_x86_roundsd_from_mem
     location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);
     current_asmdata.CurrAsmList.concat(taicpu.op_const_ref_reg(op,S_NO,GetConstInt(paraarray[2]),paraarray[1].location.reference,location.register));
   end;
+in_x86_insertps_from_mem
+,in_x86_pinsrd_from_mem
+: //var r0:xmm;r1:ptr32;imm:i32;
+  begin
+    case inlinenumber of
+      in_x86_pinsrd_from_mem: begin op:=A_pinsrd end;
+      in_x86_insertps_from_mem: begin op:=A_insertps; end;
+      else
+        Internalerror(2020010201);
+    end;
+
+    GetParameters(3);
+
+    for i := 1 to 3 do secondpass(paraarray[i]);
+
+    location_force_mmreg(current_asmdata.CurrAsmList, paraarray[1].location, false);
+    location_make_ref(paraarray[2].location);
+    location:=paraarray[1].location;
+    current_asmdata.CurrAsmList.concat(taicpu.op_const_ref_reg(op,S_NO,GetConstInt(paraarray[3]),paraarray[2].location.reference,paraarray[1].location.register));
+  end;
 in_x86_extractps
 ,in_x86_pextrd
 : //out r0:r32;r1:xmm;imm:i32;

+ 114 - 59
compiler/x86/x86mmtype.inc

@@ -69,14 +69,7 @@ in_x86_movss_from_val
 in_x86_movlps
 ,in_x86_movhps
 ,in_x86_cvtpi2ps_from_mem
-,in_x86_addsd_from_mem
-,in_x86_divsd_from_mem
 ,in_x86_maxsd_from_mem
-,in_x86_minsd_from_mem
-,in_x86_mulsd_from_mem
-,in_x86_subsd_from_mem
-,in_x86_comisd_from_mem
-,in_x86_ucomisd_from_mem
 ,in_x86_cvtdq2pd_from_mem
 ,in_x86_cvtpi2pd_from_mem
 ,in_x86_cvtps2pd_from_mem
@@ -96,15 +89,6 @@ in_x86_movlps_to_mem
   end;
 in_x86_movlhps
 ,in_x86_movhlps
-,in_x86_addss
-,in_x86_subss
-,in_x86_mulss
-,in_x86_divss
-,in_x86_rcpss
-,in_x86_sqrtss
-,in_x86_maxss
-,in_x86_minss
-,in_x86_rsqrtss
 ,in_x86_addps
 ,in_x86_subps
 ,in_x86_mulps
@@ -121,23 +105,15 @@ in_x86_movlhps
 ,in_x86_unpckhps
 ,in_x86_unpcklps
 ,in_x86_addpd
-,in_x86_addsd
 ,in_x86_divpd
-,in_x86_divsd
 ,in_x86_maxpd
-,in_x86_maxsd
 ,in_x86_minpd
-,in_x86_minsd
 ,in_x86_mulpd
-,in_x86_mulsd
 ,in_x86_subpd
-,in_x86_subsd
 ,in_x86_andpd
 ,in_x86_andnpd
 ,in_x86_orpd
 ,in_x86_xorpd
-,in_x86_comisd
-,in_x86_ucomisd
 ,in_x86_unpckhpd
 ,in_x86_unpcklpd
 ,in_x86_cvtdq2pd
@@ -147,7 +123,6 @@ in_x86_movlhps
 ,in_x86_cvtps2dq
 ,in_x86_cvtps2pd
 ,in_x86_cvtsd2ss
-,in_x86_cvtss2sd
 ,in_x86_cvttpd2dq
 ,in_x86_cvttps2dq
 ,in_x86_packssdw
@@ -245,6 +220,20 @@ in_x86_movlhps
     CheckParameters(2);
     resultdef:=x86_m128type;
   end;
+in_x86_addss
+,in_x86_subss
+,in_x86_mulss
+,in_x86_divss
+,in_x86_rcpss
+,in_x86_sqrtss
+,in_x86_maxss
+,in_x86_minss
+,in_x86_rsqrtss
+: //var r0:f32;r1:f32;
+  begin
+    CheckParameters(2);
+    resultdef:=s32floattype;
+  end;
 in_x86_addss_from_mem
 ,in_x86_subss_from_mem
 ,in_x86_mulss_from_mem
@@ -255,12 +244,10 @@ in_x86_addss_from_mem
 ,in_x86_minss_from_mem
 ,in_x86_rsqrtss_from_mem
 ,in_x86_cvtsi2ss_from_mem
-,in_x86_cvtsi2sd_from_mem
-,in_x86_cvtss2sd_from_mem
-: //var r0:xmm;r1:ptr32;
+: //var r0:f32;r1:ptr32;
   begin
     CheckParameters(2);
-    resultdef:=x86_m128type;
+    resultdef:=s32floattype;
   end;
 in_x86_addps_from_mem
 ,in_x86_subps_from_mem
@@ -391,10 +378,20 @@ in_x86_addps_from_mem
     resultdef:=x86_m128type;
   end;
 in_x86_cmpss
-,in_x86_cmpps
+: //var r0:f32;r1:f32;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=s32floattype;
+  end;
+in_x86_cmpss_from_mem
+: //var r0:f32;r1:ptr32;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=s32floattype;
+  end;
+in_x86_cmpps
 ,in_x86_shufps
 ,in_x86_cmppd
-,in_x86_cmpsd
 ,in_x86_shufpd
 ,in_x86_palignr
 ,in_x86_dpps
@@ -413,14 +410,6 @@ in_x86_cmpss
     CheckParameters(3);
     resultdef:=x86_m128type;
   end;
-in_x86_cmpss_from_mem
-,in_x86_insertps_from_mem
-,in_x86_pinsrd_from_mem
-: //var r0:xmm;r1:ptr32;imm:i32;
-  begin
-    CheckParameters(3);
-    resultdef:=x86_m128type;
-  end;
 in_x86_cmpps_from_mem
 ,in_x86_shufps_from_mem
 ,in_x86_cmppd_from_mem
@@ -442,14 +431,14 @@ in_x86_cmpps_from_mem
     resultdef:=x86_m128type;
   end;
 in_x86_cvtsi2ss
-: //var r0:xmm;r1:reg;
+: //var r0:f32;r1:reg;
   begin
     CheckParameters(2);
-    resultdef:=x86_m128type;
+    resultdef:=s32floattype;
   end;
 in_x86_cvtss2si
 ,in_x86_cvttss2si
-: //out r0:reg;r1:xmm;
+: //out r0:reg;r1:f32;
   begin
     CheckParameters(1);
     resultdef:=uinttype;
@@ -558,8 +547,38 @@ in_x86_movsd_from_val
     CheckParameters(1);
     resultdef:=x86_m128type;
   end;
+in_x86_addsd
+,in_x86_divsd
+,in_x86_minsd
+,in_x86_subsd
+,in_x86_comisd
+,in_x86_ucomisd
+,in_x86_cvtss2sd
+: //var r0:f64;r1:f64;
+  begin
+    CheckParameters(2);
+    resultdef:=s64floattype;
+  end;
+in_x86_addsd_from_mem
+,in_x86_divsd_from_mem
+,in_x86_minsd_from_mem
+,in_x86_mulsd_from_mem
+,in_x86_subsd_from_mem
+,in_x86_comisd_from_mem
+,in_x86_ucomisd_from_mem
+: //var r0:f64;r1:ptr64;
+  begin
+    CheckParameters(2);
+    resultdef:=s64floattype;
+  end;
+in_x86_maxsd
+,in_x86_mulsd
+: //var r0:f64;r1:xmm;
+  begin
+    CheckParameters(2);
+    resultdef:=s64floattype;
+  end;
 in_x86_sqrtpd
-,in_x86_sqrtsd
 ,in_x86_movddup
 ,in_x86_movsldup
 ,in_x86_movshdup
@@ -583,25 +602,29 @@ in_x86_sqrtpd
     CheckParameters(1);
     resultdef:=x86_m128type;
   end;
+in_x86_sqrtsd
+: //out r0:f64;r1:f64;
+  begin
+    CheckParameters(1);
+    resultdef:=s64floattype;
+  end;
 in_x86_sqrtsd_from_mem
-,in_x86_movq_from_mem
-,in_x86_movddup_from_mem
-,in_x86_pmovsxbw_from_mem
-,in_x86_pmovzxbw_from_mem
-,in_x86_pmovsxwd_from_mem
-,in_x86_pmovzxwd_from_mem
-,in_x86_pmovsxdq_from_mem
-,in_x86_pmovzxdq_from_mem
-: //out r0:xmm;r1:ptr64;
+: //out r0:f64;r1:ptr64;
   begin
     CheckParameters(1);
-    resultdef:=x86_m128type;
+    resultdef:=s64floattype;
+  end;
+in_x86_cmpsd
+: //var r0:f64;r1:f64;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=s64floattype;
   end;
 in_x86_cmpsd_from_mem
-: //var r0:xmm;r1:ptr64;imm:i32;
+: //var r0:f64;r1:ptr64;imm:i32;
   begin
     CheckParameters(3);
-    resultdef:=x86_m128type;
+    resultdef:=s64floattype;
   end;
 in_x86_cvtpd2pi
 ,in_x86_cvttpd2pi
@@ -632,10 +655,17 @@ in_x86_cvtsd2si_from_mem
     resultdef:=sinttype;
   end;
 in_x86_cvtsi2sd
-: //var r0:xmm;r1:r32;
+: //var r0:f64;r1:r32;
   begin
     CheckParameters(2);
-    resultdef:=x86_m128type;
+    resultdef:=s64floattype;
+  end;
+in_x86_cvtsi2sd_from_mem
+,in_x86_cvtss2sd_from_mem
+: //var r0:f64;r1:ptr32;
+  begin
+    CheckParameters(2);
+    resultdef:=s64floattype;
   end;
 in_x86_movd_from_reg
 : //out r0:xmm;r1:r32;
@@ -643,6 +673,19 @@ in_x86_movd_from_reg
     CheckParameters(1);
     resultdef:=x86_m128type;
   end;
+in_x86_movq_from_mem
+,in_x86_movddup_from_mem
+,in_x86_pmovsxbw_from_mem
+,in_x86_pmovzxbw_from_mem
+,in_x86_pmovsxwd_from_mem
+,in_x86_pmovzxwd_from_mem
+,in_x86_pmovsxdq_from_mem
+,in_x86_pmovzxdq_from_mem
+: //out r0:xmm;r1:ptr64;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;
 in_x86_pmovmskb
 : //var r0:r32;r1:xmm;
   begin
@@ -702,7 +745,6 @@ in_x86_pshufhw
 ,in_x86_roundps
 ,in_x86_roundss
 ,in_x86_roundpd
-,in_x86_roundsd
 : //out r0:xmm;r1:xmm;imm:i32;
   begin
     CheckParameters(2);
@@ -740,10 +782,23 @@ in_x86_roundss_from_mem
     CheckParameters(2);
     resultdef:=x86_m128type;
   end;
+in_x86_roundsd
+: //out r0:f64;r1:f64;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=s64floattype;
+  end;
 in_x86_roundsd_from_mem
-: //out r0:xmm;r1:ptr64;imm:i32;
+: //out r0:f64;r1:ptr64;imm:i32;
   begin
     CheckParameters(2);
+    resultdef:=s64floattype;
+  end;
+in_x86_insertps_from_mem
+,in_x86_pinsrd_from_mem
+: //var r0:xmm;r1:ptr32;imm:i32;
+  begin
+    CheckParameters(3);
     resultdef:=x86_m128type;
   end;
 in_x86_extractps

+ 49 - 49
rtl/i386/cpummprocs.inc

@@ -12,24 +12,24 @@ procedure x86_movlps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movlps_to
 procedure x86_movhps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movhps_to_mem];
 function x86_movlhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movlhps];
 function x86_movhlps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movhlps];
-function x86_addss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addss];
-function x86_addss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addss_from_mem];
-function x86_subss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subss];
-function x86_subss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subss_from_mem];
-function x86_mulss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulss];
-function x86_mulss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulss_from_mem];
-function x86_divss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divss];
-function x86_divss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divss_from_mem];
-function x86_rcpss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpss];
-function x86_rcpss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rcpss_from_mem];
-function x86_sqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtss];
-function x86_sqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtss_from_mem];
-function x86_maxss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxss];
-function x86_maxss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxss_from_mem];
-function x86_minss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minss];
-function x86_minss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minss_from_mem];
-function x86_rsqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtss];
-function x86_rsqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rsqrtss_from_mem];
+function x86_addss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_addss];
+function x86_addss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_addss_from_mem];
+function x86_subss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_subss];
+function x86_subss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_subss_from_mem];
+function x86_mulss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_mulss];
+function x86_mulss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_mulss_from_mem];
+function x86_divss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_divss];
+function x86_divss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_divss_from_mem];
+function x86_rcpss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_rcpss];
+function x86_rcpss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_rcpss_from_mem];
+function x86_sqrtss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_sqrtss];
+function x86_sqrtss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_sqrtss_from_mem];
+function x86_maxss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_maxss];
+function x86_maxss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_maxss_from_mem];
+function x86_minss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_minss];
+function x86_minss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_minss_from_mem];
+function x86_rsqrtss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_rsqrtss];
+function x86_rsqrtss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_rsqrtss_from_mem];
 function x86_addps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addps];
 function x86_addps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addps_from_mem];
 function x86_subps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subps];
@@ -56,8 +56,8 @@ function x86_xorps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorps];
 function x86_xorps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorps_from_mem];
 function x86_andnps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnps];
 function x86_andnps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andnps_from_mem];
-function x86_cmpss(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss];
-function x86_cmpss(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss_from_mem];
+function x86_cmpss(r0, r1: single; imm: longint): single; [INTERNPROC: fpc_in_x86_cmpss];
+function x86_cmpss(r0: single; r1: pointer; imm: longint): single; [INTERNPROC: fpc_in_x86_cmpss_from_mem];
 function x86_cmpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps];
 function x86_cmpps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps_from_mem];
 function x86_shufps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps];
@@ -66,11 +66,11 @@ function x86_unpckhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhps]
 function x86_unpckhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpckhps_from_mem];
 function x86_unpcklps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklps];
 function x86_unpcklps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpcklps_from_mem];
-function x86_cvtsi2ss(r0: __m128; r1: NativeUInt): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss];
-function x86_cvtsi2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss_from_mem];
-function x86_cvtss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si];
+function x86_cvtsi2ss(r0: single; r1: NativeUInt): single; [INTERNPROC: fpc_in_x86_cvtsi2ss];
+function x86_cvtsi2ss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_cvtsi2ss_from_mem];
+function x86_cvtss2si(r1: single): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si];
 function x86_cvtss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si_from_mem];
-function x86_cvttss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si];
+function x86_cvttss2si(r1: single): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si];
 function x86_cvttss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si_from_mem];
 function x86_cvtpi2ps(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps];
 function x86_cvtpi2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps_from_mem];
@@ -115,32 +115,32 @@ function x86_movsd(r1: __m128): double; [INTERNPROC: fpc_in_x86_movsd_to_val];
 function x86_movsd(r1: double): __m128; [INTERNPROC: fpc_in_x86_movsd_from_val];
 function x86_addpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addpd];
 function x86_addpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addpd_from_mem];
-function x86_addsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsd];
-function x86_addsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsd_from_mem];
+function x86_addsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_addsd];
+function x86_addsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_addsd_from_mem];
 function x86_divpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divpd];
 function x86_divpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divpd_from_mem];
-function x86_divsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divsd];
-function x86_divsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divsd_from_mem];
+function x86_divsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_divsd];
+function x86_divsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_divsd_from_mem];
 function x86_maxpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxpd];
 function x86_maxpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxpd_from_mem];
-function x86_maxsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxsd];
+function x86_maxsd(r0: double; r1: __m128): double; [INTERNPROC: fpc_in_x86_maxsd];
 function x86_maxsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxsd_from_mem];
 function x86_minpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minpd];
 function x86_minpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minpd_from_mem];
-function x86_minsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minsd];
-function x86_minsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minsd_from_mem];
+function x86_minsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_minsd];
+function x86_minsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_minsd_from_mem];
 function x86_mulpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulpd];
 function x86_mulpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulpd_from_mem];
-function x86_mulsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulsd];
-function x86_mulsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulsd_from_mem];
+function x86_mulsd(r0: double; r1: __m128): double; [INTERNPROC: fpc_in_x86_mulsd];
+function x86_mulsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_mulsd_from_mem];
 function x86_sqrtpd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtpd];
 function x86_sqrtpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtpd_from_mem];
-function x86_sqrtsd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtsd];
-function x86_sqrtsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtsd_from_mem];
+function x86_sqrtsd(r1: double): double; [INTERNPROC: fpc_in_x86_sqrtsd];
+function x86_sqrtsd(r1: pointer): double; [INTERNPROC: fpc_in_x86_sqrtsd_from_mem];
 function x86_subpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subpd];
 function x86_subpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subpd_from_mem];
-function x86_subsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subsd];
-function x86_subsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subsd_from_mem];
+function x86_subsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_subsd];
+function x86_subsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_subsd_from_mem];
 function x86_andpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andpd];
 function x86_andpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andpd_from_mem];
 function x86_andnpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnpd];
@@ -151,12 +151,12 @@ function x86_xorpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorpd];
 function x86_xorpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorpd_from_mem];
 function x86_cmppd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd];
 function x86_cmppd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd_from_mem];
-function x86_cmpsd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd];
-function x86_cmpsd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd_from_mem];
-function x86_comisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_comisd];
-function x86_comisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_comisd_from_mem];
-function x86_ucomisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_ucomisd];
-function x86_ucomisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_ucomisd_from_mem];
+function x86_cmpsd(r0, r1: double; imm: longint): double; [INTERNPROC: fpc_in_x86_cmpsd];
+function x86_cmpsd(r0: double; r1: pointer; imm: longint): double; [INTERNPROC: fpc_in_x86_cmpsd_from_mem];
+function x86_comisd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_comisd];
+function x86_comisd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_comisd_from_mem];
+function x86_ucomisd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_ucomisd];
+function x86_ucomisd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_ucomisd_from_mem];
 function x86_shufpd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd];
 function x86_shufpd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd_from_mem];
 function x86_unpckhpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhpd];
@@ -183,10 +183,10 @@ function x86_cvtsd2si(r0: NativeInt; r1: __m128): NativeInt; [INTERNPROC: fpc_in
 function x86_cvtsd2si(r0: NativeInt; r1: pointer): NativeInt; [INTERNPROC: fpc_in_x86_cvtsd2si_from_mem];
 function x86_cvtsd2ss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss];
 function x86_cvtsd2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss_from_mem];
-function x86_cvtsi2sd(r0: __m128; r1: longword): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd];
-function x86_cvtsi2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd_from_mem];
-function x86_cvtss2sd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd];
-function x86_cvtss2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd_from_mem];
+function x86_cvtsi2sd(r0: double; r1: longword): double; [INTERNPROC: fpc_in_x86_cvtsi2sd];
+function x86_cvtsi2sd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_cvtsi2sd_from_mem];
+function x86_cvtss2sd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_cvtss2sd];
+function x86_cvtss2sd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_cvtss2sd_from_mem];
 function x86_cvttpd2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq];
 function x86_cvttpd2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq_from_mem];
 function x86_cvttpd2pi(r0: __m64; r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttpd2pi];
@@ -412,8 +412,8 @@ function x86_roundss(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_
 function x86_roundss(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundss_from_mem];
 function x86_roundpd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd];
 function x86_roundpd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd_from_mem];
-function x86_roundsd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd];
-function x86_roundsd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd_from_mem];
+function x86_roundsd(r1: double; imm: longint): double; [INTERNPROC: fpc_in_x86_roundsd];
+function x86_roundsd(r1: pointer; imm: longint): double; [INTERNPROC: fpc_in_x86_roundsd_from_mem];
 function x86_insertps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps];
 function x86_insertps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps_from_mem];
 function x86_extractps(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_extractps];

+ 46 - 47
rtl/x86_64/cpumminnr.inc

@@ -454,50 +454,49 @@
   fpc_in_x86_pinsrq_from_mem = fpc_in_x86_mm_first+453;
   fpc_in_x86_pextrb = fpc_in_x86_mm_first+454;
   fpc_in_x86_pextrb_to_mem = fpc_in_x86_mm_first+455;
-  fpc_in_x86_pextrw_sse41 = fpc_in_x86_mm_first+456;
-  fpc_in_x86_pextrw_sse41_to_mem = fpc_in_x86_mm_first+457;
-  fpc_in_x86_pextrd = fpc_in_x86_mm_first+458;
-  fpc_in_x86_pextrd_to_mem = fpc_in_x86_mm_first+459;
-  fpc_in_x86_pextrq = fpc_in_x86_mm_first+460;
-  fpc_in_x86_pextrq_to_mem = fpc_in_x86_mm_first+461;
-  fpc_in_x86_pmovsxbw = fpc_in_x86_mm_first+462;
-  fpc_in_x86_pmovsxbw_from_mem = fpc_in_x86_mm_first+463;
-  fpc_in_x86_pmovzxbw = fpc_in_x86_mm_first+464;
-  fpc_in_x86_pmovzxbw_from_mem = fpc_in_x86_mm_first+465;
-  fpc_in_x86_pmovsxbd = fpc_in_x86_mm_first+466;
-  fpc_in_x86_pmovsxbd_from_mem = fpc_in_x86_mm_first+467;
-  fpc_in_x86_pmovzxbd = fpc_in_x86_mm_first+468;
-  fpc_in_x86_pmovzxbd_from_mem = fpc_in_x86_mm_first+469;
-  fpc_in_x86_pmovsxbq = fpc_in_x86_mm_first+470;
-  fpc_in_x86_pmovsxbq_from_mem = fpc_in_x86_mm_first+471;
-  fpc_in_x86_pmovzxbq = fpc_in_x86_mm_first+472;
-  fpc_in_x86_pmovzxbq_from_mem = fpc_in_x86_mm_first+473;
-  fpc_in_x86_pmovsxwd = fpc_in_x86_mm_first+474;
-  fpc_in_x86_pmovsxwd_from_mem = fpc_in_x86_mm_first+475;
-  fpc_in_x86_pmovzxwd = fpc_in_x86_mm_first+476;
-  fpc_in_x86_pmovzxwd_from_mem = fpc_in_x86_mm_first+477;
-  fpc_in_x86_pmovsxwq = fpc_in_x86_mm_first+478;
-  fpc_in_x86_pmovsxwq_from_mem = fpc_in_x86_mm_first+479;
-  fpc_in_x86_pmovzxwq = fpc_in_x86_mm_first+480;
-  fpc_in_x86_pmovzxwq_from_mem = fpc_in_x86_mm_first+481;
-  fpc_in_x86_pmovsxdq = fpc_in_x86_mm_first+482;
-  fpc_in_x86_pmovsxdq_from_mem = fpc_in_x86_mm_first+483;
-  fpc_in_x86_pmovzxdq = fpc_in_x86_mm_first+484;
-  fpc_in_x86_pmovzxdq_from_mem = fpc_in_x86_mm_first+485;
-  fpc_in_x86_ptest = fpc_in_x86_mm_first+486;
-  fpc_in_x86_ptest_from_mem = fpc_in_x86_mm_first+487;
-  fpc_in_x86_pcmpeqq = fpc_in_x86_mm_first+488;
-  fpc_in_x86_pcmpeqq_from_mem = fpc_in_x86_mm_first+489;
-  fpc_in_x86_packusdw = fpc_in_x86_mm_first+490;
-  fpc_in_x86_packusdw_from_mem = fpc_in_x86_mm_first+491;
-  fpc_in_x86_movntdqa = fpc_in_x86_mm_first+492;
-  fpc_in_x86_pcmpestri = fpc_in_x86_mm_first+493;
-  fpc_in_x86_pcmpestri_from_mem = fpc_in_x86_mm_first+494;
-  fpc_in_x86_pcmpestrm = fpc_in_x86_mm_first+495;
-  fpc_in_x86_pcmpestrm_from_mem = fpc_in_x86_mm_first+496;
-  fpc_in_x86_pcmpistri = fpc_in_x86_mm_first+497;
-  fpc_in_x86_pcmpistri_from_mem = fpc_in_x86_mm_first+498;
-  fpc_in_x86_pcmpistrm = fpc_in_x86_mm_first+499;
-  fpc_in_x86_pcmpistrm_from_mem = fpc_in_x86_mm_first+500;
-  fpc_in_x86_pcmpgtq = fpc_in_x86_mm_first+501;
-  fpc_in_x86_pcmpgtq_from_mem = fpc_in_x86_mm_first+502;
+  fpc_in_x86_pextrw_sse41_to_mem = fpc_in_x86_mm_first+456;
+  fpc_in_x86_pextrd = fpc_in_x86_mm_first+457;
+  fpc_in_x86_pextrd_to_mem = fpc_in_x86_mm_first+458;
+  fpc_in_x86_pextrq = fpc_in_x86_mm_first+459;
+  fpc_in_x86_pextrq_to_mem = fpc_in_x86_mm_first+460;
+  fpc_in_x86_pmovsxbw = fpc_in_x86_mm_first+461;
+  fpc_in_x86_pmovsxbw_from_mem = fpc_in_x86_mm_first+462;
+  fpc_in_x86_pmovzxbw = fpc_in_x86_mm_first+463;
+  fpc_in_x86_pmovzxbw_from_mem = fpc_in_x86_mm_first+464;
+  fpc_in_x86_pmovsxbd = fpc_in_x86_mm_first+465;
+  fpc_in_x86_pmovsxbd_from_mem = fpc_in_x86_mm_first+466;
+  fpc_in_x86_pmovzxbd = fpc_in_x86_mm_first+467;
+  fpc_in_x86_pmovzxbd_from_mem = fpc_in_x86_mm_first+468;
+  fpc_in_x86_pmovsxbq = fpc_in_x86_mm_first+469;
+  fpc_in_x86_pmovsxbq_from_mem = fpc_in_x86_mm_first+470;
+  fpc_in_x86_pmovzxbq = fpc_in_x86_mm_first+471;
+  fpc_in_x86_pmovzxbq_from_mem = fpc_in_x86_mm_first+472;
+  fpc_in_x86_pmovsxwd = fpc_in_x86_mm_first+473;
+  fpc_in_x86_pmovsxwd_from_mem = fpc_in_x86_mm_first+474;
+  fpc_in_x86_pmovzxwd = fpc_in_x86_mm_first+475;
+  fpc_in_x86_pmovzxwd_from_mem = fpc_in_x86_mm_first+476;
+  fpc_in_x86_pmovsxwq = fpc_in_x86_mm_first+477;
+  fpc_in_x86_pmovsxwq_from_mem = fpc_in_x86_mm_first+478;
+  fpc_in_x86_pmovzxwq = fpc_in_x86_mm_first+479;
+  fpc_in_x86_pmovzxwq_from_mem = fpc_in_x86_mm_first+480;
+  fpc_in_x86_pmovsxdq = fpc_in_x86_mm_first+481;
+  fpc_in_x86_pmovsxdq_from_mem = fpc_in_x86_mm_first+482;
+  fpc_in_x86_pmovzxdq = fpc_in_x86_mm_first+483;
+  fpc_in_x86_pmovzxdq_from_mem = fpc_in_x86_mm_first+484;
+  fpc_in_x86_ptest = fpc_in_x86_mm_first+485;
+  fpc_in_x86_ptest_from_mem = fpc_in_x86_mm_first+486;
+  fpc_in_x86_pcmpeqq = fpc_in_x86_mm_first+487;
+  fpc_in_x86_pcmpeqq_from_mem = fpc_in_x86_mm_first+488;
+  fpc_in_x86_packusdw = fpc_in_x86_mm_first+489;
+  fpc_in_x86_packusdw_from_mem = fpc_in_x86_mm_first+490;
+  fpc_in_x86_movntdqa = fpc_in_x86_mm_first+491;
+  fpc_in_x86_pcmpestri = fpc_in_x86_mm_first+492;
+  fpc_in_x86_pcmpestri_from_mem = fpc_in_x86_mm_first+493;
+  fpc_in_x86_pcmpestrm = fpc_in_x86_mm_first+494;
+  fpc_in_x86_pcmpestrm_from_mem = fpc_in_x86_mm_first+495;
+  fpc_in_x86_pcmpistri = fpc_in_x86_mm_first+496;
+  fpc_in_x86_pcmpistri_from_mem = fpc_in_x86_mm_first+497;
+  fpc_in_x86_pcmpistrm = fpc_in_x86_mm_first+498;
+  fpc_in_x86_pcmpistrm_from_mem = fpc_in_x86_mm_first+499;
+  fpc_in_x86_pcmpgtq = fpc_in_x86_mm_first+500;
+  fpc_in_x86_pcmpgtq_from_mem = fpc_in_x86_mm_first+501;

+ 57 - 50
rtl/x86_64/cpummprocs.inc

@@ -12,24 +12,24 @@ procedure x86_movlps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movlps_to
 procedure x86_movhps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movhps_to_mem];
 function x86_movlhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movlhps];
 function x86_movhlps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movhlps];
-function x86_addss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addss];
-function x86_addss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addss_from_mem];
-function x86_subss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subss];
-function x86_subss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subss_from_mem];
-function x86_mulss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulss];
-function x86_mulss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulss_from_mem];
-function x86_divss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divss];
-function x86_divss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divss_from_mem];
-function x86_rcpss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpss];
-function x86_rcpss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rcpss_from_mem];
-function x86_sqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtss];
-function x86_sqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtss_from_mem];
-function x86_maxss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxss];
-function x86_maxss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxss_from_mem];
-function x86_minss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minss];
-function x86_minss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minss_from_mem];
-function x86_rsqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtss];
-function x86_rsqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rsqrtss_from_mem];
+function x86_addss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_addss];
+function x86_addss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_addss_from_mem];
+function x86_subss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_subss];
+function x86_subss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_subss_from_mem];
+function x86_mulss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_mulss];
+function x86_mulss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_mulss_from_mem];
+function x86_divss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_divss];
+function x86_divss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_divss_from_mem];
+function x86_rcpss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_rcpss];
+function x86_rcpss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_rcpss_from_mem];
+function x86_sqrtss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_sqrtss];
+function x86_sqrtss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_sqrtss_from_mem];
+function x86_maxss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_maxss];
+function x86_maxss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_maxss_from_mem];
+function x86_minss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_minss];
+function x86_minss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_minss_from_mem];
+function x86_rsqrtss(r0, r1: single): single; [INTERNPROC: fpc_in_x86_rsqrtss];
+function x86_rsqrtss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_rsqrtss_from_mem];
 function x86_addps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addps];
 function x86_addps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addps_from_mem];
 function x86_subps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subps];
@@ -56,8 +56,8 @@ function x86_xorps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorps];
 function x86_xorps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorps_from_mem];
 function x86_andnps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnps];
 function x86_andnps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andnps_from_mem];
-function x86_cmpss(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss];
-function x86_cmpss(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss_from_mem];
+function x86_cmpss(r0, r1: single; imm: longint): single; [INTERNPROC: fpc_in_x86_cmpss];
+function x86_cmpss(r0: single; r1: pointer; imm: longint): single; [INTERNPROC: fpc_in_x86_cmpss_from_mem];
 function x86_cmpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps];
 function x86_cmpps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps_from_mem];
 function x86_shufps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps];
@@ -66,11 +66,11 @@ function x86_unpckhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhps]
 function x86_unpckhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpckhps_from_mem];
 function x86_unpcklps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklps];
 function x86_unpcklps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpcklps_from_mem];
-function x86_cvtsi2ss(r0: __m128; r1: NativeUInt): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss];
-function x86_cvtsi2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss_from_mem];
-function x86_cvtss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si];
+function x86_cvtsi2ss(r0: single; r1: NativeUInt): single; [INTERNPROC: fpc_in_x86_cvtsi2ss];
+function x86_cvtsi2ss(r0: single; r1: pointer): single; [INTERNPROC: fpc_in_x86_cvtsi2ss_from_mem];
+function x86_cvtss2si(r1: single): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si];
 function x86_cvtss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si_from_mem];
-function x86_cvttss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si];
+function x86_cvttss2si(r1: single): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si];
 function x86_cvttss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si_from_mem];
 function x86_cvtpi2ps(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps];
 function x86_cvtpi2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps_from_mem];
@@ -115,32 +115,32 @@ function x86_movsd(r1: __m128): double; [INTERNPROC: fpc_in_x86_movsd_to_val];
 function x86_movsd(r1: double): __m128; [INTERNPROC: fpc_in_x86_movsd_from_val];
 function x86_addpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addpd];
 function x86_addpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addpd_from_mem];
-function x86_addsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsd];
-function x86_addsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsd_from_mem];
+function x86_addsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_addsd];
+function x86_addsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_addsd_from_mem];
 function x86_divpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divpd];
 function x86_divpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divpd_from_mem];
-function x86_divsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divsd];
-function x86_divsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divsd_from_mem];
+function x86_divsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_divsd];
+function x86_divsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_divsd_from_mem];
 function x86_maxpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxpd];
 function x86_maxpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxpd_from_mem];
-function x86_maxsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxsd];
+function x86_maxsd(r0: double; r1: __m128): double; [INTERNPROC: fpc_in_x86_maxsd];
 function x86_maxsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxsd_from_mem];
 function x86_minpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minpd];
 function x86_minpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minpd_from_mem];
-function x86_minsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minsd];
-function x86_minsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minsd_from_mem];
+function x86_minsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_minsd];
+function x86_minsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_minsd_from_mem];
 function x86_mulpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulpd];
 function x86_mulpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulpd_from_mem];
-function x86_mulsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulsd];
-function x86_mulsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulsd_from_mem];
+function x86_mulsd(r0: double; r1: __m128): double; [INTERNPROC: fpc_in_x86_mulsd];
+function x86_mulsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_mulsd_from_mem];
 function x86_sqrtpd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtpd];
 function x86_sqrtpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtpd_from_mem];
-function x86_sqrtsd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtsd];
-function x86_sqrtsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtsd_from_mem];
+function x86_sqrtsd(r1: double): double; [INTERNPROC: fpc_in_x86_sqrtsd];
+function x86_sqrtsd(r1: pointer): double; [INTERNPROC: fpc_in_x86_sqrtsd_from_mem];
 function x86_subpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subpd];
 function x86_subpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subpd_from_mem];
-function x86_subsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subsd];
-function x86_subsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subsd_from_mem];
+function x86_subsd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_subsd];
+function x86_subsd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_subsd_from_mem];
 function x86_andpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andpd];
 function x86_andpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andpd_from_mem];
 function x86_andnpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnpd];
@@ -151,12 +151,12 @@ function x86_xorpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorpd];
 function x86_xorpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorpd_from_mem];
 function x86_cmppd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd];
 function x86_cmppd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd_from_mem];
-function x86_cmpsd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd];
-function x86_cmpsd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd_from_mem];
-function x86_comisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_comisd];
-function x86_comisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_comisd_from_mem];
-function x86_ucomisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_ucomisd];
-function x86_ucomisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_ucomisd_from_mem];
+function x86_cmpsd(r0, r1: double; imm: longint): double; [INTERNPROC: fpc_in_x86_cmpsd];
+function x86_cmpsd(r0: double; r1: pointer; imm: longint): double; [INTERNPROC: fpc_in_x86_cmpsd_from_mem];
+function x86_comisd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_comisd];
+function x86_comisd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_comisd_from_mem];
+function x86_ucomisd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_ucomisd];
+function x86_ucomisd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_ucomisd_from_mem];
 function x86_shufpd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd];
 function x86_shufpd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd_from_mem];
 function x86_unpckhpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhpd];
@@ -183,10 +183,10 @@ function x86_cvtsd2si(r0: NativeInt; r1: __m128): NativeInt; [INTERNPROC: fpc_in
 function x86_cvtsd2si(r0: NativeInt; r1: pointer): NativeInt; [INTERNPROC: fpc_in_x86_cvtsd2si_from_mem];
 function x86_cvtsd2ss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss];
 function x86_cvtsd2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss_from_mem];
-function x86_cvtsi2sd(r0: __m128; r1: longword): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd];
-function x86_cvtsi2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd_from_mem];
-function x86_cvtss2sd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd];
-function x86_cvtss2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd_from_mem];
+function x86_cvtsi2sd(r0: double; r1: longword): double; [INTERNPROC: fpc_in_x86_cvtsi2sd];
+function x86_cvtsi2sd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_cvtsi2sd_from_mem];
+function x86_cvtss2sd(r0, r1: double): double; [INTERNPROC: fpc_in_x86_cvtss2sd];
+function x86_cvtss2sd(r0: double; r1: pointer): double; [INTERNPROC: fpc_in_x86_cvtss2sd_from_mem];
 function x86_cvttpd2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq];
 function x86_cvttpd2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq_from_mem];
 function x86_cvttpd2pi(r0: __m64; r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttpd2pi];
@@ -412,8 +412,8 @@ function x86_roundss(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_
 function x86_roundss(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundss_from_mem];
 function x86_roundpd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd];
 function x86_roundpd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd_from_mem];
-function x86_roundsd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd];
-function x86_roundsd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd_from_mem];
+function x86_roundsd(r1: double; imm: longint): double; [INTERNPROC: fpc_in_x86_roundsd];
+function x86_roundsd(r1: pointer; imm: longint): double; [INTERNPROC: fpc_in_x86_roundsd_from_mem];
 function x86_insertps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps];
 function x86_insertps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps_from_mem];
 function x86_extractps(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_extractps];
@@ -450,16 +450,23 @@ function x86_pinsrb(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC
 function x86_pinsrb(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrb_from_mem];
 function x86_pinsrd(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrd];
 function x86_pinsrd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrd_from_mem];
+{$ifdef X86_64}
 function x86_pinsrq(r0: __m128; r1: NativeUInt; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrq];
+{$endif}
+{$ifdef X86_64}
 function x86_pinsrq(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrq_from_mem];
+{$endif}
 function x86_pextrb(r1: __m128; imm: longint): byte; [INTERNPROC: fpc_in_x86_pextrb];
 procedure x86_pextrb(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrb_to_mem];
-//function x86_pextrw(r1: __m128; imm: longint): word; [INTERNPROC: fpc_in_x86_pextrw_sse41];
 procedure x86_pextrw(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrw_sse41_to_mem];
 function x86_pextrd(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrd];
 procedure x86_pextrd(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrd_to_mem];
+{$ifdef X86_64}
 function x86_pextrq(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrq];
+{$endif}
+{$ifdef X86_64}
 procedure x86_pextrq(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrq_to_mem];
+{$endif}
 function x86_pmovsxbw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxbw];
 function x86_pmovsxbw(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxbw_from_mem];
 function x86_pmovzxbw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxbw];