Browse Source

* location.size of trunc/round must be OS_S64
* fixed assembling of cvt*2si
+ optimized trunc/round for x86_64/sse
* only floats need no type cast for trunc/round

git-svn-id: trunk@6859 -

florian 18 năm trước cách đây
mục cha
commit
f4840620f0

+ 3 - 3
compiler/i386/i386tab.inc

@@ -8831,7 +8831,7 @@
     opcode  : A_CVTSD2SI;
     ops     : 2;
     optypes : (ot_reg32 or ot_bits64,ot_xmmreg,ot_none);
-    code    : #211#1#242#208#2#15#45#72;
+    code    : #1#242#208#2#15#45#72;
     flags   : if_willamette or if_sse2
   ),
   (
@@ -8859,7 +8859,7 @@
     opcode  : A_CVTSI2SD;
     ops     : 2;
     optypes : (ot_xmmreg,ot_reg32 or ot_bits64,ot_none);
-    code    : #211#1#242#209#2#15#42#72;
+    code    : #1#242#209#2#15#42#72;
     flags   : if_willamette or if_sse2
   ),
   (
@@ -8929,7 +8929,7 @@
     opcode  : A_CVTTSD2SI;
     ops     : 2;
     optypes : (ot_reg32 or ot_bits64,ot_xmmreg,ot_none);
-    code    : #211#1#242#208#2#15#44#72;
+    code    : #1#242#208#2#15#44#72;
     flags   : if_willamette or if_sse2
   ),
   (

+ 8 - 6
compiler/ninl.pas

@@ -2116,9 +2116,10 @@ implementation
                   else
                     begin
                       set_varstate(left,vs_read,[vsf_must_be_valid]);
-                      { for direct rounding, no best real type cast should be necessary
-                      inserttypeconv(left,pbestrealtype^);
-                      }
+                      { for direct float rounding, no best real type cast should be necessary }
+                      if not((left.resultdef.typ=floatdef) and
+                        (tfloatdef(left.resultdef).floattype in [s32real,s64real,s80real,s128real])) then
+                        inserttypeconv(left,pbestrealtype^);
                       resultdef:=s64inttype;
                     end;
                 end;
@@ -2139,9 +2140,10 @@ implementation
                   else
                     begin
                       set_varstate(left,vs_read,[vsf_must_be_valid]);
-                      { for direct rounding, no best real type cast should be necessary
-                      inserttypeconv(left,pbestrealtype^);
-                      }
+                      { for direct float rounding, no best real type cast should be necessary }
+                      if not((left.resultdef.typ=floatdef) and
+                        (tfloatdef(left.resultdef).floattype in [s32real,s64real,s80real,s128real])) then
+                        inserttypeconv(left,pbestrealtype^);
                       resultdef:=s64inttype;
                     end;
                 end;

+ 26 - 16
compiler/x86/nx86inl.pas

@@ -284,22 +284,27 @@ implementation
        var
          href : treference;
        begin
-       {
+{$ifdef x86_64}
          if use_sse(left.resultdef) then
            begin
              secondpass(left);
              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
-             location.loc:=LOC_REFERENCE;
-             current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_const(A_PSHUFD,S_XMM,location.left.register,location.left.register))
-             current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_const(A_PSHUFD,S_XMM,location.left.register,location.left.register))
-
-             tg.GetTempTyped(current_asmdata.CurrAsmList,left.resultdef,tt_normal,location.reference);
+             location_reset(location,LOC_REGISTER,OS_S64);
+             location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
+             case left.location.size of
+               OS_F32:
+                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_Q,left.location.register,location.register));
+               OS_F64:
+                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_Q,left.location.register,location.register));
+               else
+                 internalerror(2007031402);
+             end;
            end
          else
-       }
+{$endif x86_64}
           begin
             load_fpu_location;
-            location_reset(location,LOC_REFERENCE,OS_64);
+            location_reset(location,LOC_REFERENCE,OS_S64);
             tg.GetTempTyped(current_asmdata.CurrAsmList,resultdef,tt_normal,location.reference);
             emit_ref(A_FISTP,S_IQ,location.reference);
             emit_none(A_FWAIT,S_NO);
@@ -313,19 +318,24 @@ implementation
          oldcw,newcw : treference;
          tempreg : tregister;
        begin
-       {
+{$ifdef x86_64}
          if use_sse(left.resultdef) then
            begin
              secondpass(left);
              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
-             location.loc:=LOC_REFERENCE;
-             current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_const(A_PSHUFD,S_XMM,location.left.register,location.left.register))
-             current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_const(A_PSHUFD,S_XMM,location.left.register,location.left.register))
-
-             tg.GetTempTyped(current_asmdata.CurrAsmList,left.resultdef,tt_normal,location.reference);
+             location_reset(location,LOC_REGISTER,OS_S64);
+             location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
+             case left.location.size of
+               OS_F32:
+                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_Q,left.location.register,location.register));
+               OS_F64:
+                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_Q,left.location.register,location.register));
+               else
+                 internalerror(2007031401);
+             end;
            end
          else
-       }
+{$endif x86_64}
           begin
             tg.GetTemp(current_asmdata.CurrAsmList,2,tt_normal,oldcw);
             tg.GetTemp(current_asmdata.CurrAsmList,2,tt_normal,newcw);
@@ -334,7 +344,7 @@ implementation
             emit_const_ref(A_OR,S_W,$0f00,newcw);
             load_fpu_location;
             emit_ref(A_FLDCW,S_NO,newcw);
-            location_reset(location,LOC_REFERENCE,OS_64);
+            location_reset(location,LOC_REFERENCE,OS_S64);
             tg.GetTempTyped(current_asmdata.CurrAsmList,resultdef,tt_normal,location.reference);
             emit_ref(A_FISTP,S_IQ,location.reference);
             emit_ref(A_FLDCW,S_NO,oldcw);

+ 3 - 3
compiler/x86/x86ins.dat

@@ -2909,7 +2909,7 @@ xmmreg,mem              \301\323\2\x0F\x5A\110          WILLAMETTE,SSE2
 
 [CVTSD2SI]
 (Ch_Wop2, Ch_Rop1, Ch_None)
-reg32|64,xmmreg         \323\1\xF2\320\2\x0F\x2D\110        WILLAMETTE,SSE2
+reg32|64,xmmreg         \1\xF2\320\2\x0F\x2D\110        WILLAMETTE,SSE2
 reg32|64,mem            \301\1\xF2\320\2\x0F\x2D\110    WILLAMETTE,SSE2
 
 [CVTSD2SS]
@@ -2919,7 +2919,7 @@ xmmreg,mem              \301\1\xF2\323\2\x0F\x5A\110    WILLAMETTE,SSE2
 
 [CVTSI2SD]
 (Ch_Wop2, Ch_Rop1, Ch_None)
-xmmreg,reg32|64         \323\1\xF2\321\2\x0F\x2A\110        WILLAMETTE,SSE2
+xmmreg,reg32|64         \1\xF2\321\2\x0F\x2A\110        WILLAMETTE,SSE2
 xmmreg,mem              \301\1\xF2\321\2\x0F\x2A\110    WILLAMETTE,SSE2
 
 [CVTSS2SD]
@@ -2944,7 +2944,7 @@ xmmreg,mem              \301\333\2\x0F\x5B\110          WILLAMETTE,SSE2,SM
 
 [CVTTSD2SI]
 (Ch_Wop2, Ch_Rop1, Ch_None)
-reg32|64,xmmreg         \323\1\xF2\320\2\x0F\x2C\110    WILLAMETTE,SSE2
+reg32|64,xmmreg         \1\xF2\320\2\x0F\x2C\110        WILLAMETTE,SSE2
 reg32|64,mem            \301\1\xF2\320\2\x0F\x2C\110    WILLAMETTE,SSE2
 
 [DIVPD]

+ 3 - 3
compiler/x86_64/x8664tab.inc

@@ -8789,7 +8789,7 @@
     opcode  : A_CVTSD2SI;
     ops     : 2;
     optypes : (ot_reg32 or ot_bits64,ot_xmmreg,ot_none);
-    code    : #211#1#242#208#2#15#45#72;
+    code    : #1#242#208#2#15#45#72;
     flags   : if_willamette or if_sse2
   ),
   (
@@ -8817,7 +8817,7 @@
     opcode  : A_CVTSI2SD;
     ops     : 2;
     optypes : (ot_xmmreg,ot_reg32 or ot_bits64,ot_none);
-    code    : #211#1#242#209#2#15#42#72;
+    code    : #1#242#209#2#15#42#72;
     flags   : if_willamette or if_sse2
   ),
   (
@@ -8887,7 +8887,7 @@
     opcode  : A_CVTTSD2SI;
     ops     : 2;
     optypes : (ot_reg32 or ot_bits64,ot_xmmreg,ot_none);
-    code    : #211#1#242#208#2#15#44#72;
+    code    : #1#242#208#2#15#44#72;
     flags   : if_willamette or if_sse2
   ),
   (