Browse Source

+ sqr uses sse on x86 if possible
* the result type of sqr is equal to the argument in case of usual floats

git-svn-id: trunk@1265 -

florian 20 years ago
parent
commit
6371333361
2 changed files with 27 additions and 4 deletions
  1. 15 2
      compiler/ninl.pas
  2. 12 2
      compiler/x86/nx86inl.pas

+ 15 - 2
compiler/ninl.pas

@@ -1206,6 +1206,20 @@ implementation
           end;
 
 
+      procedure setfloatresulttype;
+        begin
+          if (left.resulttype.def.deftype=floatdef) and
+            (tfloatdef(left.resulttype.def).typ in [s32real,s64real,s80real,s128real]) then
+            resulttype:=left.resulttype
+          else
+            begin
+              inserttypeconv(left,pbestrealtype^);
+              resulttype:=pbestrealtype^;
+            end;
+        end;
+
+
+
       var
          vl,vl2    : TConstExprInt;
          vr        : bestreal;
@@ -1935,8 +1949,7 @@ implementation
                   else
                    begin
                      set_varstate(left,vs_used,[vsf_must_be_valid]);
-                     inserttypeconv(left,pbestrealtype^);
-                     resulttype:=pbestrealtype^;
+                     setfloatresulttype;
                    end;
                 end;
 

+ 12 - 2
compiler/x86/nx86inl.pas

@@ -246,8 +246,18 @@ implementation
      procedure tx86inlinenode.second_sqr_real;
 
        begin
-         load_fpu_location;
-         emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
+         if use_sse(resulttype.def) then
+           begin
+             secondpass(left);
+             location_force_mmregscalar(exprasmlist,left.location,false);
+             location:=left.location;
+             cg.a_opmm_loc_reg(exprasmlist,OP_MUL,left.location.size,left.location,left.location.register,mms_movescalar);
+           end
+         else
+           begin
+             load_fpu_location;
+             emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
+           end;
        end;
 
      procedure tx86inlinenode.second_sqrt_real;