Browse Source

+ use sse for sqrt if possible
* same optimization for sqrt as for sqr

git-svn-id: trunk@1266 -

florian 20 years ago
parent
commit
19e97e4da2
2 changed files with 20 additions and 4 deletions
  1. 1 2
      compiler/ninl.pas
  2. 19 2
      compiler/x86/nx86inl.pas

+ 1 - 2
compiler/ninl.pas

@@ -1966,8 +1966,7 @@ implementation
                   else
                   else
                    begin
                    begin
                      set_varstate(left,vs_used,[vsf_must_be_valid]);
                      set_varstate(left,vs_used,[vsf_must_be_valid]);
-                     inserttypeconv(left,pbestrealtype^);
-                     resulttype:=pbestrealtype^;
+                     setfloatresulttype;
                    end;
                    end;
                 end;
                 end;
 
 

+ 19 - 2
compiler/x86/nx86inl.pas

@@ -262,8 +262,25 @@ implementation
 
 
      procedure tx86inlinenode.second_sqrt_real;
      procedure tx86inlinenode.second_sqrt_real;
        begin
        begin
-         load_fpu_location;
-         emit_none(A_FSQRT,S_NO);
+         if use_sse(resulttype.def) then
+           begin
+             secondpass(left);
+             location_force_mmregscalar(exprasmlist,left.location,false);
+             location:=left.location;
+             case tfloatdef(resulttype.def).typ of
+               s32real:
+                 exprasmlist.concat(taicpu.op_reg_reg(A_SQRTSS,S_XMM,location.register,location.register));
+               s64real:
+                 exprasmlist.concat(taicpu.op_reg_reg(A_SQRTSD,S_XMM,location.register,location.register));
+               else
+                 internalerror(200510031);
+             end;
+           end
+         else
+           begin
+             load_fpu_location;
+             emit_none(A_FSQRT,S_NO);
+           end;
        end;
        end;
 
 
      procedure tx86inlinenode.second_ln_real;
      procedure tx86inlinenode.second_ln_real;