Преглед на файлове

Merged revisions 1265-1266 via svnmerge from
http://[email protected]/svn/fpc/trunk

r1265 (florian)
+ sqr uses sse on x86 if possible
* the result type of sqr is equal to the argument in case of usual floats


r1266 (florian)
+ use sse for sqrt if possible
* same optimization for sqrt as for sqr

git-svn-id: branches/fixes_2_0@1290 -

peter преди 20 години
родител
ревизия
0b42018cd1
променени са 2 файла, в които са добавени 47 реда и са изтрити 8 реда
  1. 16 4
      compiler/ninl.pas
  2. 31 4
      compiler/x86/nx86inl.pas

+ 16 - 4
compiler/ninl.pas

@@ -1206,6 +1206,20 @@ implementation
           end;
 
 
+      procedure setfloatresulttype;
+        begin
+          if (left.resulttype.def.deftype=floatdef) and
+            (tfloatdef(left.resulttype.def).typ in [s32real,s64real,s80real,s128real]) then
+            resulttype:=left.resulttype
+          else
+            begin
+              inserttypeconv(left,pbestrealtype^);
+              resulttype:=pbestrealtype^;
+            end;
+        end;
+
+
+
       var
          vl,vl2    : TConstExprInt;
          vr        : bestreal;
@@ -1935,8 +1949,7 @@ implementation
                   else
                    begin
                      set_varstate(left,vs_used,[vsf_must_be_valid]);
-                     inserttypeconv(left,pbestrealtype^);
-                     resulttype:=pbestrealtype^;
+                     setfloatresulttype;
                    end;
                 end;
 
@@ -1953,8 +1966,7 @@ implementation
                   else
                    begin
                      set_varstate(left,vs_used,[vsf_must_be_valid]);
-                     inserttypeconv(left,pbestrealtype^);
-                     resulttype:=pbestrealtype^;
+                     setfloatresulttype;
                    end;
                 end;
 

+ 31 - 4
compiler/x86/nx86inl.pas

@@ -213,14 +213,41 @@ implementation
      procedure tx86inlinenode.second_sqr_real;
 
        begin
-         load_fpu_location;
-         emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
+         if use_sse(resulttype.def) then
+           begin
+             secondpass(left);
+             location_force_mmregscalar(exprasmlist,left.location,false);
+             location:=left.location;
+             cg.a_opmm_loc_reg(exprasmlist,OP_MUL,left.location.size,left.location,left.location.register,mms_movescalar);
+           end
+         else
+           begin
+             load_fpu_location;
+             emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
+           end;
        end;
 
      procedure tx86inlinenode.second_sqrt_real;
        begin
-         load_fpu_location;
-         emit_none(A_FSQRT,S_NO);
+         if use_sse(resulttype.def) then
+           begin
+             secondpass(left);
+             location_force_mmregscalar(exprasmlist,left.location,false);
+             location:=left.location;
+             case tfloatdef(resulttype.def).typ of
+               s32real:
+                 exprasmlist.concat(taicpu.op_reg_reg(A_SQRTSS,S_XMM,location.register,location.register));
+               s64real:
+                 exprasmlist.concat(taicpu.op_reg_reg(A_SQRTSD,S_XMM,location.register,location.register));
+               else
+                 internalerror(200510031);
+             end;
+           end
+         else
+           begin
+             load_fpu_location;
+             emit_none(A_FSQRT,S_NO);
+           end;
        end;
 
      procedure tx86inlinenode.second_ln_real;